/* thumb2-mlkem-asm
 *
 * Copyright (C) 2006-2025 wolfSSL Inc.
 *
 * This file is part of wolfSSL.
 *
 * wolfSSL is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * wolfSSL is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
 */

/* Generated using (from wolfssl):
 *   cd ../scripts
 *   ruby ./kyber/kyber.rb \
 *       thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-mlkem-asm.c
 */

#include <wolfssl/wolfcrypt/libwolfssl_sources_asm.h>
#include <wolfssl/wolfcrypt/error-crypt.h>

#ifdef WOLFSSL_ARMASM
#ifdef WOLFSSL_ARMASM_THUMB2
#ifdef WOLFSSL_ARMASM_INLINE

#ifdef __IAR_SYSTEMS_ICC__
#define __asm__        asm
#define __volatile__   volatile
#define WOLFSSL_NO_VAR_ASSIGN_REG
#endif /* __IAR_SYSTEMS_ICC__ */
#ifdef __KEIL__
#define __asm__        __asm
#define __volatile__   volatile
#endif /* __KEIL__ */
#include <wolfssl/wolfcrypt/wc_mlkem.h>

#ifdef WOLFSSL_WC_MLKEM
XALIGNED(16) static const word16 L_mlkem_thumb2_ntt_zetas[] = {
    0x08ed, 0x0a0b, 0x0b9a, 0x0714, 0x05d5, 0x058e, 0x011f, 0x00ca,
    0x0c56, 0x026e, 0x0629, 0x00b6, 0x03c2, 0x084f, 0x073f, 0x05bc,
    0x023d, 0x07d4, 0x0108, 0x017f, 0x09c4, 0x05b2, 0x06bf, 0x0c7f,
    0x0a58, 0x03f9, 0x02dc, 0x0260, 0x06fb, 0x019b, 0x0c34, 0x06de,
    0x04c7, 0x028c, 0x0ad9, 0x03f7, 0x07f4, 0x05d3, 0x0be7, 0x06f9,
    0x0204, 0x0cf9, 0x0bc1, 0x0a67, 0x06af, 0x0877, 0x007e, 0x05bd,
    0x09ac, 0x0ca7, 0x0bf2, 0x033e, 0x006b, 0x0774, 0x0c0a, 0x094a,
    0x0b73, 0x03c1, 0x071d, 0x0a2c, 0x01c0, 0x08d8, 0x02a5, 0x0806,
    0x08b2, 0x01ae, 0x022b, 0x034b, 0x081e, 0x0367, 0x060e, 0x0069,
    0x01a6, 0x024b, 0x00b1, 0x0c16, 0x0bde, 0x0b35, 0x0626, 0x0675,
    0x0c0b, 0x030a, 0x0487, 0x0c6e, 0x09f8, 0x05cb, 0x0aa7, 0x045f,
    0x06cb, 0x0284, 0x0999, 0x015d, 0x01a2, 0x0149, 0x0c65, 0x0cb6,
    0x0331, 0x0449, 0x025b, 0x0262, 0x052a, 0x07fc, 0x0748, 0x0180,
    0x0842, 0x0c79, 0x04c2, 0x07ca, 0x0997, 0x00dc, 0x085e, 0x0686,
    0x0860, 0x0707, 0x0803, 0x031a, 0x071b, 0x09ab, 0x099b, 0x01de,
    0x0c95, 0x0bcd, 0x03e4, 0x03df, 0x03be, 0x074d, 0x05f2, 0x065c,
};

#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void mlkem_thumb2_ntt(sword16* r_p)
#else
void mlkem_thumb2_ntt(sword16* r)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register sword16* r __asm__ ("r0") = (sword16*)r_p;
    register word16* L_mlkem_thumb2_ntt_zetas_c __asm__ ("r1") =
        (word16*)&L_mlkem_thumb2_ntt_zetas;

#else
    register word16* L_mlkem_thumb2_ntt_zetas_c =
        (word16*)&L_mlkem_thumb2_ntt_zetas;

#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "SUB	sp, sp, #0x8\n\t"
        "MOV	r1, %[L_mlkem_thumb2_ntt_zetas]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "MOV	r12, #0xd01\n\t"
        "MOVT	r12, #0xcff\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "MOV	r2, #0x10\n\t"
        "\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
    "L_mlkem_thumb2_ntt_loop_123:\n\t"
#else
    "L_mlkem_thumb2_ntt_loop_123_%=:\n\t"
#endif
        "STR	r2, [sp]\n\t"
        "LDRH	lr, [r1, #2]\n\t"
        "LDR	r2, [%[r]]\n\t"
        "LDR	r3, [%[r], #64]\n\t"
        "LDR	r4, [%[r], #128]\n\t"
        "LDR	r5, [%[r], #192]\n\t"
        "LDR	r6, [%[r], #256]\n\t"
        "LDR	r7, [%[r], #320]\n\t"
        "LDR	r8, [%[r], #384]\n\t"
        "LDR	r9, [%[r], #448]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r6\n\t"
        "SMULBT	r6, lr, r6\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r6\n\t"
        "SMLABB	r11, r12, r11, r6\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r6, r2, r10\n\t"
        "SADD16	r2, r2, r10\n\t"
#else
        "SBFX	r10, r6, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r6, r6, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r6, r11, r6\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r6, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r6\n\t"
        "SUB	r6, r2, r11\n\t"
        "ADD	r2, r2, r11\n\t"
        "SUB	r11, r2, r10, LSR #16\n\t"
        "ADD	r10, r2, r10, LSR #16\n\t"
        "BFI	r6, r11, #0, #16\n\t"
        "BFI	r2, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r7\n\t"
        "SMULBT	r7, lr, r7\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r7\n\t"
        "SMLABB	r11, r12, r11, r7\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r7, r3, r10\n\t"
        "SADD16	r3, r3, r10\n\t"
#else
        "SBFX	r10, r7, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r7, r7, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r7, r11, r7\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r7, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r7\n\t"
        "SUB	r7, r3, r11\n\t"
        "ADD	r3, r3, r11\n\t"
        "SUB	r11, r3, r10, LSR #16\n\t"
        "ADD	r10, r3, r10, LSR #16\n\t"
        "BFI	r7, r11, #0, #16\n\t"
        "BFI	r3, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r8\n\t"
        "SMULBT	r8, lr, r8\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r8\n\t"
        "SMLABB	r11, r12, r11, r8\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r8, r4, r10\n\t"
        "SADD16	r4, r4, r10\n\t"
#else
        "SBFX	r10, r8, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r8, r8, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r8, r11, r8\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r8, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r8\n\t"
        "SUB	r8, r4, r11\n\t"
        "ADD	r4, r4, r11\n\t"
        "SUB	r11, r4, r10, LSR #16\n\t"
        "ADD	r10, r4, r10, LSR #16\n\t"
        "BFI	r8, r11, #0, #16\n\t"
        "BFI	r4, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r9\n\t"
        "SMULBT	r9, lr, r9\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r9\n\t"
        "SMLABB	r11, r12, r11, r9\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r9, r5, r10\n\t"
        "SADD16	r5, r5, r10\n\t"
#else
        "SBFX	r10, r9, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r9, r9, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r9, r11, r9\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r9, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r9\n\t"
        "SUB	r9, r5, r11\n\t"
        "ADD	r5, r5, r11\n\t"
        "SUB	r11, r5, r10, LSR #16\n\t"
        "ADD	r10, r5, r10, LSR #16\n\t"
        "BFI	r9, r11, #0, #16\n\t"
        "BFI	r5, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "LDR	lr, [r1, #4]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r4\n\t"
        "SMULBT	r4, lr, r4\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r4\n\t"
        "SMLABB	r11, r12, r11, r4\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r4, r2, r10\n\t"
        "SADD16	r2, r2, r10\n\t"
#else
        "SBFX	r10, r4, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r4, r4, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r4, r11, r4\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r4, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r4\n\t"
        "SUB	r4, r2, r11\n\t"
        "ADD	r2, r2, r11\n\t"
        "SUB	r11, r2, r10, LSR #16\n\t"
        "ADD	r10, r2, r10, LSR #16\n\t"
        "BFI	r4, r11, #0, #16\n\t"
        "BFI	r2, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r5\n\t"
        "SMULBT	r5, lr, r5\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r5\n\t"
        "SMLABB	r11, r12, r11, r5\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r5, r3, r10\n\t"
        "SADD16	r3, r3, r10\n\t"
#else
        "SBFX	r10, r5, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r5, r5, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r5, r11, r5\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r5, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r5\n\t"
        "SUB	r5, r3, r11\n\t"
        "ADD	r3, r3, r11\n\t"
        "SUB	r11, r3, r10, LSR #16\n\t"
        "ADD	r10, r3, r10, LSR #16\n\t"
        "BFI	r5, r11, #0, #16\n\t"
        "BFI	r3, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULTB	r10, lr, r8\n\t"
        "SMULTT	r8, lr, r8\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r8\n\t"
        "SMLABB	r11, r12, r11, r8\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r8, r6, r10\n\t"
        "SADD16	r6, r6, r10\n\t"
#else
        "SBFX	r10, r8, #0, #16\n\t"
        "SBFX	r11, lr, #16, #16\n\t"
        "ASR	r8, r8, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r8, r11, r8\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r8, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r8\n\t"
        "SUB	r8, r6, r11\n\t"
        "ADD	r6, r6, r11\n\t"
        "SUB	r11, r6, r10, LSR #16\n\t"
        "ADD	r10, r6, r10, LSR #16\n\t"
        "BFI	r8, r11, #0, #16\n\t"
        "BFI	r6, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULTB	r10, lr, r9\n\t"
        "SMULTT	r9, lr, r9\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r9\n\t"
        "SMLABB	r11, r12, r11, r9\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r9, r7, r10\n\t"
        "SADD16	r7, r7, r10\n\t"
#else
        "SBFX	r10, r9, #0, #16\n\t"
        "SBFX	r11, lr, #16, #16\n\t"
        "ASR	r9, r9, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r9, r11, r9\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r9, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r9\n\t"
        "SUB	r9, r7, r11\n\t"
        "ADD	r7, r7, r11\n\t"
        "SUB	r11, r7, r10, LSR #16\n\t"
        "ADD	r10, r7, r10, LSR #16\n\t"
        "BFI	r9, r11, #0, #16\n\t"
        "BFI	r7, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "LDR	lr, [r1, #8]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r3\n\t"
        "SMULBT	r3, lr, r3\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r3\n\t"
        "SMLABB	r11, r12, r11, r3\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r3, r2, r10\n\t"
        "SADD16	r2, r2, r10\n\t"
#else
        "SBFX	r10, r3, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r3, r3, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r3, r11, r3\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r3, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r3\n\t"
        "SUB	r3, r2, r11\n\t"
        "ADD	r2, r2, r11\n\t"
        "SUB	r11, r2, r10, LSR #16\n\t"
        "ADD	r10, r2, r10, LSR #16\n\t"
        "BFI	r3, r11, #0, #16\n\t"
        "BFI	r2, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULTB	r10, lr, r5\n\t"
        "SMULTT	r5, lr, r5\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r5\n\t"
        "SMLABB	r11, r12, r11, r5\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r5, r4, r10\n\t"
        "SADD16	r4, r4, r10\n\t"
#else
        "SBFX	r10, r5, #0, #16\n\t"
        "SBFX	r11, lr, #16, #16\n\t"
        "ASR	r5, r5, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r5, r11, r5\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r5, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r5\n\t"
        "SUB	r5, r4, r11\n\t"
        "ADD	r4, r4, r11\n\t"
        "SUB	r11, r4, r10, LSR #16\n\t"
        "ADD	r10, r4, r10, LSR #16\n\t"
        "BFI	r5, r11, #0, #16\n\t"
        "BFI	r4, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "LDR	lr, [r1, #12]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r7\n\t"
        "SMULBT	r7, lr, r7\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r7\n\t"
        "SMLABB	r11, r12, r11, r7\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r7, r6, r10\n\t"
        "SADD16	r6, r6, r10\n\t"
#else
        "SBFX	r10, r7, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r7, r7, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r7, r11, r7\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r7, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r7\n\t"
        "SUB	r7, r6, r11\n\t"
        "ADD	r6, r6, r11\n\t"
        "SUB	r11, r6, r10, LSR #16\n\t"
        "ADD	r10, r6, r10, LSR #16\n\t"
        "BFI	r7, r11, #0, #16\n\t"
        "BFI	r6, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULTB	r10, lr, r9\n\t"
        "SMULTT	r9, lr, r9\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r9\n\t"
        "SMLABB	r11, r12, r11, r9\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r9, r8, r10\n\t"
        "SADD16	r8, r8, r10\n\t"
#else
        "SBFX	r10, r9, #0, #16\n\t"
        "SBFX	r11, lr, #16, #16\n\t"
        "ASR	r9, r9, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r9, r11, r9\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r9, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r9\n\t"
        "SUB	r9, r8, r11\n\t"
        "ADD	r8, r8, r11\n\t"
        "SUB	r11, r8, r10, LSR #16\n\t"
        "ADD	r10, r8, r10, LSR #16\n\t"
        "BFI	r9, r11, #0, #16\n\t"
        "BFI	r8, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "STR	r2, [%[r]]\n\t"
        "STR	r3, [%[r], #64]\n\t"
        "STR	r4, [%[r], #128]\n\t"
        "STR	r5, [%[r], #192]\n\t"
        "STR	r6, [%[r], #256]\n\t"
        "STR	r7, [%[r], #320]\n\t"
        "STR	r8, [%[r], #384]\n\t"
        "STR	r9, [%[r], #448]\n\t"
        "LDR	r2, [sp]\n\t"
        "SUBS	r2, r2, #0x1\n\t"
        "ADD	%[r], %[r], #0x4\n\t"
#if defined(__GNUC__)
        "BNE	L_mlkem_thumb2_ntt_loop_123_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BNE.N	L_mlkem_thumb2_ntt_loop_123\n\t"
#else
        "BNE.N	L_mlkem_thumb2_ntt_loop_123_%=\n\t"
#endif
        "SUB	%[r], %[r], #0x40\n\t"
        "MOV	r3, #0x0\n\t"
        "\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
    "L_mlkem_thumb2_ntt_loop_4_j:\n\t"
#else
    "L_mlkem_thumb2_ntt_loop_4_j_%=:\n\t"
#endif
        "STR	r3, [sp, #4]\n\t"
        "ADD	lr, r1, r3, LSR #4\n\t"
        "MOV	r2, #0x4\n\t"
        "LDR	lr, [lr, #16]\n\t"
        "\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
    "L_mlkem_thumb2_ntt_loop_4_i:\n\t"
#else
    "L_mlkem_thumb2_ntt_loop_4_i_%=:\n\t"
#endif
        "STR	r2, [sp]\n\t"
        "LDR	r2, [%[r]]\n\t"
        "LDR	r3, [%[r], #16]\n\t"
        "LDR	r4, [%[r], #32]\n\t"
        "LDR	r5, [%[r], #48]\n\t"
        "LDR	r6, [%[r], #64]\n\t"
        "LDR	r7, [%[r], #80]\n\t"
        "LDR	r8, [%[r], #96]\n\t"
        "LDR	r9, [%[r], #112]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r4\n\t"
        "SMULBT	r4, lr, r4\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r4\n\t"
        "SMLABB	r11, r12, r11, r4\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r4, r2, r10\n\t"
        "SADD16	r2, r2, r10\n\t"
#else
        "SBFX	r10, r4, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r4, r4, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r4, r11, r4\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r4, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r4\n\t"
        "SUB	r4, r2, r11\n\t"
        "ADD	r2, r2, r11\n\t"
        "SUB	r11, r2, r10, LSR #16\n\t"
        "ADD	r10, r2, r10, LSR #16\n\t"
        "BFI	r4, r11, #0, #16\n\t"
        "BFI	r2, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r5\n\t"
        "SMULBT	r5, lr, r5\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r5\n\t"
        "SMLABB	r11, r12, r11, r5\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r5, r3, r10\n\t"
        "SADD16	r3, r3, r10\n\t"
#else
        "SBFX	r10, r5, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r5, r5, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r5, r11, r5\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r5, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r5\n\t"
        "SUB	r5, r3, r11\n\t"
        "ADD	r3, r3, r11\n\t"
        "SUB	r11, r3, r10, LSR #16\n\t"
        "ADD	r10, r3, r10, LSR #16\n\t"
        "BFI	r5, r11, #0, #16\n\t"
        "BFI	r3, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULTB	r10, lr, r8\n\t"
        "SMULTT	r8, lr, r8\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r8\n\t"
        "SMLABB	r11, r12, r11, r8\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r8, r6, r10\n\t"
        "SADD16	r6, r6, r10\n\t"
#else
        "SBFX	r10, r8, #0, #16\n\t"
        "SBFX	r11, lr, #16, #16\n\t"
        "ASR	r8, r8, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r8, r11, r8\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r8, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r8\n\t"
        "SUB	r8, r6, r11\n\t"
        "ADD	r6, r6, r11\n\t"
        "SUB	r11, r6, r10, LSR #16\n\t"
        "ADD	r10, r6, r10, LSR #16\n\t"
        "BFI	r8, r11, #0, #16\n\t"
        "BFI	r6, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULTB	r10, lr, r9\n\t"
        "SMULTT	r9, lr, r9\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r9\n\t"
        "SMLABB	r11, r12, r11, r9\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r9, r7, r10\n\t"
        "SADD16	r7, r7, r10\n\t"
#else
        "SBFX	r10, r9, #0, #16\n\t"
        "SBFX	r11, lr, #16, #16\n\t"
        "ASR	r9, r9, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r9, r11, r9\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r9, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r9\n\t"
        "SUB	r9, r7, r11\n\t"
        "ADD	r7, r7, r11\n\t"
        "SUB	r11, r7, r10, LSR #16\n\t"
        "ADD	r10, r7, r10, LSR #16\n\t"
        "BFI	r9, r11, #0, #16\n\t"
        "BFI	r7, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "STR	r2, [%[r]]\n\t"
        "STR	r3, [%[r], #16]\n\t"
        "STR	r4, [%[r], #32]\n\t"
        "STR	r5, [%[r], #48]\n\t"
        "STR	r6, [%[r], #64]\n\t"
        "STR	r7, [%[r], #80]\n\t"
        "STR	r8, [%[r], #96]\n\t"
        "STR	r9, [%[r], #112]\n\t"
        "LDRD	r2, r3, [sp]\n\t"
        "SUBS	r2, r2, #0x1\n\t"
        "ADD	%[r], %[r], #0x4\n\t"
#if defined(__GNUC__)
        "BNE	L_mlkem_thumb2_ntt_loop_4_i_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BNE.N	L_mlkem_thumb2_ntt_loop_4_i\n\t"
#else
        "BNE.N	L_mlkem_thumb2_ntt_loop_4_i_%=\n\t"
#endif
        "ADD	r3, r3, #0x40\n\t"
        "RSBS	r10, r3, #0x100\n\t"
        "ADD	%[r], %[r], #0x70\n\t"
#if defined(__GNUC__)
        "BNE	L_mlkem_thumb2_ntt_loop_4_j_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BNE.N	L_mlkem_thumb2_ntt_loop_4_j\n\t"
#else
        "BNE.N	L_mlkem_thumb2_ntt_loop_4_j_%=\n\t"
#endif
        "SUB	%[r], %[r], #0x200\n\t"
        "MOV	r3, #0x0\n\t"
        "\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
    "L_mlkem_thumb2_ntt_loop_567:\n\t"
#else
    "L_mlkem_thumb2_ntt_loop_567_%=:\n\t"
#endif
        "ADD	lr, r1, r3, LSR #3\n\t"
        "STR	r3, [sp, #4]\n\t"
        "LDRH	lr, [lr, #32]\n\t"
        "LDR	r2, [%[r]]\n\t"
        "LDR	r3, [%[r], #4]\n\t"
        "LDR	r4, [%[r], #8]\n\t"
        "LDR	r5, [%[r], #12]\n\t"
        "LDR	r6, [%[r], #16]\n\t"
        "LDR	r7, [%[r], #20]\n\t"
        "LDR	r8, [%[r], #24]\n\t"
        "LDR	r9, [%[r], #28]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r6\n\t"
        "SMULBT	r6, lr, r6\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r6\n\t"
        "SMLABB	r11, r12, r11, r6\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r6, r2, r10\n\t"
        "SADD16	r2, r2, r10\n\t"
#else
        "SBFX	r10, r6, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r6, r6, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r6, r11, r6\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r6, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r6\n\t"
        "SUB	r6, r2, r11\n\t"
        "ADD	r2, r2, r11\n\t"
        "SUB	r11, r2, r10, LSR #16\n\t"
        "ADD	r10, r2, r10, LSR #16\n\t"
        "BFI	r6, r11, #0, #16\n\t"
        "BFI	r2, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r7\n\t"
        "SMULBT	r7, lr, r7\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r7\n\t"
        "SMLABB	r11, r12, r11, r7\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r7, r3, r10\n\t"
        "SADD16	r3, r3, r10\n\t"
#else
        "SBFX	r10, r7, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r7, r7, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r7, r11, r7\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r7, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r7\n\t"
        "SUB	r7, r3, r11\n\t"
        "ADD	r3, r3, r11\n\t"
        "SUB	r11, r3, r10, LSR #16\n\t"
        "ADD	r10, r3, r10, LSR #16\n\t"
        "BFI	r7, r11, #0, #16\n\t"
        "BFI	r3, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r8\n\t"
        "SMULBT	r8, lr, r8\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r8\n\t"
        "SMLABB	r11, r12, r11, r8\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r8, r4, r10\n\t"
        "SADD16	r4, r4, r10\n\t"
#else
        "SBFX	r10, r8, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r8, r8, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r8, r11, r8\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r8, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r8\n\t"
        "SUB	r8, r4, r11\n\t"
        "ADD	r4, r4, r11\n\t"
        "SUB	r11, r4, r10, LSR #16\n\t"
        "ADD	r10, r4, r10, LSR #16\n\t"
        "BFI	r8, r11, #0, #16\n\t"
        "BFI	r4, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r9\n\t"
        "SMULBT	r9, lr, r9\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r9\n\t"
        "SMLABB	r11, r12, r11, r9\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r9, r5, r10\n\t"
        "SADD16	r5, r5, r10\n\t"
#else
        "SBFX	r10, r9, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r9, r9, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r9, r11, r9\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r9, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r9\n\t"
        "SUB	r9, r5, r11\n\t"
        "ADD	r5, r5, r11\n\t"
        "SUB	r11, r5, r10, LSR #16\n\t"
        "ADD	r10, r5, r10, LSR #16\n\t"
        "BFI	r9, r11, #0, #16\n\t"
        "BFI	r5, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "LDR	lr, [sp, #4]\n\t"
        "ADD	lr, r1, lr, LSR #2\n\t"
        "LDR	lr, [lr, #64]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r4\n\t"
        "SMULBT	r4, lr, r4\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r4\n\t"
        "SMLABB	r11, r12, r11, r4\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r4, r2, r10\n\t"
        "SADD16	r2, r2, r10\n\t"
#else
        "SBFX	r10, r4, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r4, r4, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r4, r11, r4\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r4, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r4\n\t"
        "SUB	r4, r2, r11\n\t"
        "ADD	r2, r2, r11\n\t"
        "SUB	r11, r2, r10, LSR #16\n\t"
        "ADD	r10, r2, r10, LSR #16\n\t"
        "BFI	r4, r11, #0, #16\n\t"
        "BFI	r2, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r5\n\t"
        "SMULBT	r5, lr, r5\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r5\n\t"
        "SMLABB	r11, r12, r11, r5\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r5, r3, r10\n\t"
        "SADD16	r3, r3, r10\n\t"
#else
        "SBFX	r10, r5, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r5, r5, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r5, r11, r5\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r5, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r5\n\t"
        "SUB	r5, r3, r11\n\t"
        "ADD	r3, r3, r11\n\t"
        "SUB	r11, r3, r10, LSR #16\n\t"
        "ADD	r10, r3, r10, LSR #16\n\t"
        "BFI	r5, r11, #0, #16\n\t"
        "BFI	r3, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULTB	r10, lr, r8\n\t"
        "SMULTT	r8, lr, r8\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r8\n\t"
        "SMLABB	r11, r12, r11, r8\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r8, r6, r10\n\t"
        "SADD16	r6, r6, r10\n\t"
#else
        "SBFX	r10, r8, #0, #16\n\t"
        "SBFX	r11, lr, #16, #16\n\t"
        "ASR	r8, r8, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r8, r11, r8\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r8, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r8\n\t"
        "SUB	r8, r6, r11\n\t"
        "ADD	r6, r6, r11\n\t"
        "SUB	r11, r6, r10, LSR #16\n\t"
        "ADD	r10, r6, r10, LSR #16\n\t"
        "BFI	r8, r11, #0, #16\n\t"
        "BFI	r6, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULTB	r10, lr, r9\n\t"
        "SMULTT	r9, lr, r9\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r9\n\t"
        "SMLABB	r11, r12, r11, r9\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r9, r7, r10\n\t"
        "SADD16	r7, r7, r10\n\t"
#else
        "SBFX	r10, r9, #0, #16\n\t"
        "SBFX	r11, lr, #16, #16\n\t"
        "ASR	r9, r9, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r9, r11, r9\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r9, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r9\n\t"
        "SUB	r9, r7, r11\n\t"
        "ADD	r7, r7, r11\n\t"
        "SUB	r11, r7, r10, LSR #16\n\t"
        "ADD	r10, r7, r10, LSR #16\n\t"
        "BFI	r9, r11, #0, #16\n\t"
        "BFI	r7, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "LDR	lr, [sp, #4]\n\t"
        "ADD	lr, r1, lr, LSR #1\n\t"
        "LDR	lr, [lr, #128]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r3\n\t"
        "SMULBT	r3, lr, r3\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r3\n\t"
        "SMLABB	r11, r12, r11, r3\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r3, r2, r10\n\t"
        "SADD16	r2, r2, r10\n\t"
#else
        "SBFX	r10, r3, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r3, r3, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r3, r11, r3\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r3, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r3\n\t"
        "SUB	r3, r2, r11\n\t"
        "ADD	r2, r2, r11\n\t"
        "SUB	r11, r2, r10, LSR #16\n\t"
        "ADD	r10, r2, r10, LSR #16\n\t"
        "BFI	r3, r11, #0, #16\n\t"
        "BFI	r2, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULTB	r10, lr, r5\n\t"
        "SMULTT	r5, lr, r5\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r5\n\t"
        "SMLABB	r11, r12, r11, r5\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r5, r4, r10\n\t"
        "SADD16	r4, r4, r10\n\t"
#else
        "SBFX	r10, r5, #0, #16\n\t"
        "SBFX	r11, lr, #16, #16\n\t"
        "ASR	r5, r5, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r5, r11, r5\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r5, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r5\n\t"
        "SUB	r5, r4, r11\n\t"
        "ADD	r4, r4, r11\n\t"
        "SUB	r11, r4, r10, LSR #16\n\t"
        "ADD	r10, r4, r10, LSR #16\n\t"
        "BFI	r5, r11, #0, #16\n\t"
        "BFI	r4, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "LDR	lr, [sp, #4]\n\t"
        "ADD	lr, r1, lr, LSR #1\n\t"
        "LDR	lr, [lr, #132]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r7\n\t"
        "SMULBT	r7, lr, r7\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r7\n\t"
        "SMLABB	r11, r12, r11, r7\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r7, r6, r10\n\t"
        "SADD16	r6, r6, r10\n\t"
#else
        "SBFX	r10, r7, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r7, r7, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r7, r11, r7\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r7, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r7\n\t"
        "SUB	r7, r6, r11\n\t"
        "ADD	r6, r6, r11\n\t"
        "SUB	r11, r6, r10, LSR #16\n\t"
        "ADD	r10, r6, r10, LSR #16\n\t"
        "BFI	r7, r11, #0, #16\n\t"
        "BFI	r6, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULTB	r10, lr, r9\n\t"
        "SMULTT	r9, lr, r9\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r9\n\t"
        "SMLABB	r11, r12, r11, r9\n\t"
        "PKHTB	r10, r11, r10, ASR #16\n\t"
        "SSUB16	r9, r8, r10\n\t"
        "SADD16	r8, r8, r10\n\t"
#else
        "SBFX	r10, r9, #0, #16\n\t"
        "SBFX	r11, lr, #16, #16\n\t"
        "ASR	r9, r9, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MUL	r9, r11, r9\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r9, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r11, r12, r11, r9\n\t"
        "SUB	r9, r8, r11\n\t"
        "ADD	r8, r8, r11\n\t"
        "SUB	r11, r8, r10, LSR #16\n\t"
        "ADD	r10, r8, r10, LSR #16\n\t"
        "BFI	r9, r11, #0, #16\n\t"
        "BFI	r8, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "MOV	lr, #0xafc0\n\t"
        "MOVT	lr, #0x13\n\t"
#else
        "MOV	lr, #0x4ebf\n\t"
        "MOV	r12, #0xd01\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULWB	r10, lr, r2\n\t"
        "SMULWT	r11, lr, r2\n\t"
        "SMULBT	r10, r12, r10\n\t"
        "SMULBT	r11, r12, r11\n\t"
        "PKHBT	r10, r10, r11, LSL #16\n\t"
        "SSUB16	r2, r2, r10\n\t"
#else
        "SBFX	r10, r2, #0, #16\n\t"
        "SBFX	r11, r2, #16, #16\n\t"
        "MUL	r10, lr, r10\n\t"
        "MUL	r11, lr, r11\n\t"
        "ASR	r10, r10, #26\n\t"
        "ASR	r11, r11, #26\n\t"
        "MUL	r10, r12, r10\n\t"
        "MUL	r11, r12, r11\n\t"
        "SUB	r11, r2, r11, LSL #16\n\t"
        "SUB	r2, r2, r10\n\t"
        "LSR	r11, r11, #16\n\t"
        "BFI	r2, r11, #16, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULWB	r10, lr, r3\n\t"
        "SMULWT	r11, lr, r3\n\t"
        "SMULBT	r10, r12, r10\n\t"
        "SMULBT	r11, r12, r11\n\t"
        "PKHBT	r10, r10, r11, LSL #16\n\t"
        "SSUB16	r3, r3, r10\n\t"
#else
        "SBFX	r10, r3, #0, #16\n\t"
        "SBFX	r11, r3, #16, #16\n\t"
        "MUL	r10, lr, r10\n\t"
        "MUL	r11, lr, r11\n\t"
        "ASR	r10, r10, #26\n\t"
        "ASR	r11, r11, #26\n\t"
        "MUL	r10, r12, r10\n\t"
        "MUL	r11, r12, r11\n\t"
        "SUB	r11, r3, r11, LSL #16\n\t"
        "SUB	r3, r3, r10\n\t"
        "LSR	r11, r11, #16\n\t"
        "BFI	r3, r11, #16, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULWB	r10, lr, r4\n\t"
        "SMULWT	r11, lr, r4\n\t"
        "SMULBT	r10, r12, r10\n\t"
        "SMULBT	r11, r12, r11\n\t"
        "PKHBT	r10, r10, r11, LSL #16\n\t"
        "SSUB16	r4, r4, r10\n\t"
#else
        "SBFX	r10, r4, #0, #16\n\t"
        "SBFX	r11, r4, #16, #16\n\t"
        "MUL	r10, lr, r10\n\t"
        "MUL	r11, lr, r11\n\t"
        "ASR	r10, r10, #26\n\t"
        "ASR	r11, r11, #26\n\t"
        "MUL	r10, r12, r10\n\t"
        "MUL	r11, r12, r11\n\t"
        "SUB	r11, r4, r11, LSL #16\n\t"
        "SUB	r4, r4, r10\n\t"
        "LSR	r11, r11, #16\n\t"
        "BFI	r4, r11, #16, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULWB	r10, lr, r5\n\t"
        "SMULWT	r11, lr, r5\n\t"
        "SMULBT	r10, r12, r10\n\t"
        "SMULBT	r11, r12, r11\n\t"
        "PKHBT	r10, r10, r11, LSL #16\n\t"
        "SSUB16	r5, r5, r10\n\t"
#else
        "SBFX	r10, r5, #0, #16\n\t"
        "SBFX	r11, r5, #16, #16\n\t"
        "MUL	r10, lr, r10\n\t"
        "MUL	r11, lr, r11\n\t"
        "ASR	r10, r10, #26\n\t"
        "ASR	r11, r11, #26\n\t"
        "MUL	r10, r12, r10\n\t"
        "MUL	r11, r12, r11\n\t"
        "SUB	r11, r5, r11, LSL #16\n\t"
        "SUB	r5, r5, r10\n\t"
        "LSR	r11, r11, #16\n\t"
        "BFI	r5, r11, #16, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULWB	r10, lr, r6\n\t"
        "SMULWT	r11, lr, r6\n\t"
        "SMULBT	r10, r12, r10\n\t"
        "SMULBT	r11, r12, r11\n\t"
        "PKHBT	r10, r10, r11, LSL #16\n\t"
        "SSUB16	r6, r6, r10\n\t"
#else
        "SBFX	r10, r6, #0, #16\n\t"
        "SBFX	r11, r6, #16, #16\n\t"
        "MUL	r10, lr, r10\n\t"
        "MUL	r11, lr, r11\n\t"
        "ASR	r10, r10, #26\n\t"
        "ASR	r11, r11, #26\n\t"
        "MUL	r10, r12, r10\n\t"
        "MUL	r11, r12, r11\n\t"
        "SUB	r11, r6, r11, LSL #16\n\t"
        "SUB	r6, r6, r10\n\t"
        "LSR	r11, r11, #16\n\t"
        "BFI	r6, r11, #16, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULWB	r10, lr, r7\n\t"
        "SMULWT	r11, lr, r7\n\t"
        "SMULBT	r10, r12, r10\n\t"
        "SMULBT	r11, r12, r11\n\t"
        "PKHBT	r10, r10, r11, LSL #16\n\t"
        "SSUB16	r7, r7, r10\n\t"
#else
        "SBFX	r10, r7, #0, #16\n\t"
        "SBFX	r11, r7, #16, #16\n\t"
        "MUL	r10, lr, r10\n\t"
        "MUL	r11, lr, r11\n\t"
        "ASR	r10, r10, #26\n\t"
        "ASR	r11, r11, #26\n\t"
        "MUL	r10, r12, r10\n\t"
        "MUL	r11, r12, r11\n\t"
        "SUB	r11, r7, r11, LSL #16\n\t"
        "SUB	r7, r7, r10\n\t"
        "LSR	r11, r11, #16\n\t"
        "BFI	r7, r11, #16, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULWB	r10, lr, r8\n\t"
        "SMULWT	r11, lr, r8\n\t"
        "SMULBT	r10, r12, r10\n\t"
        "SMULBT	r11, r12, r11\n\t"
        "PKHBT	r10, r10, r11, LSL #16\n\t"
        "SSUB16	r8, r8, r10\n\t"
#else
        "SBFX	r10, r8, #0, #16\n\t"
        "SBFX	r11, r8, #16, #16\n\t"
        "MUL	r10, lr, r10\n\t"
        "MUL	r11, lr, r11\n\t"
        "ASR	r10, r10, #26\n\t"
        "ASR	r11, r11, #26\n\t"
        "MUL	r10, r12, r10\n\t"
        "MUL	r11, r12, r11\n\t"
        "SUB	r11, r8, r11, LSL #16\n\t"
        "SUB	r8, r8, r10\n\t"
        "LSR	r11, r11, #16\n\t"
        "BFI	r8, r11, #16, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULWB	r10, lr, r9\n\t"
        "SMULWT	r11, lr, r9\n\t"
        "SMULBT	r10, r12, r10\n\t"
        "SMULBT	r11, r12, r11\n\t"
        "PKHBT	r10, r10, r11, LSL #16\n\t"
        "SSUB16	r9, r9, r10\n\t"
#else
        "SBFX	r10, r9, #0, #16\n\t"
        "SBFX	r11, r9, #16, #16\n\t"
        "MUL	r10, lr, r10\n\t"
        "MUL	r11, lr, r11\n\t"
        "ASR	r10, r10, #26\n\t"
        "ASR	r11, r11, #26\n\t"
        "MUL	r10, r12, r10\n\t"
        "MUL	r11, r12, r11\n\t"
        "SUB	r11, r9, r11, LSL #16\n\t"
        "SUB	r9, r9, r10\n\t"
        "LSR	r11, r11, #16\n\t"
        "BFI	r9, r11, #16, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "MOV	r12, #0xd01\n\t"
        "MOVT	r12, #0xcff\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "STR	r2, [%[r]]\n\t"
        "STR	r3, [%[r], #4]\n\t"
        "STR	r4, [%[r], #8]\n\t"
        "STR	r5, [%[r], #12]\n\t"
        "STR	r6, [%[r], #16]\n\t"
        "STR	r7, [%[r], #20]\n\t"
        "STR	r8, [%[r], #24]\n\t"
        "STR	r9, [%[r], #28]\n\t"
        "LDR	r3, [sp, #4]\n\t"
        "ADD	r3, r3, #0x10\n\t"
        "RSBS	r10, r3, #0x100\n\t"
        "ADD	%[r], %[r], #0x20\n\t"
#if defined(__GNUC__)
        "BNE	L_mlkem_thumb2_ntt_loop_567_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BNE.N	L_mlkem_thumb2_ntt_loop_567\n\t"
#else
        "BNE.N	L_mlkem_thumb2_ntt_loop_567_%=\n\t"
#endif
        "ADD	sp, sp, #0x8\n\t"
        : [r] "+r" (r),
          [L_mlkem_thumb2_ntt_zetas] "+r" (L_mlkem_thumb2_ntt_zetas_c)
        :
        : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
            "r11", "r12", "lr"
    );
}

XALIGNED(16) static const word16 L_mlkem_invntt_zetas_inv[] = {
    0x06a5, 0x070f, 0x05b4, 0x0943, 0x0922, 0x091d, 0x0134, 0x006c,
    0x0b23, 0x0366, 0x0356, 0x05e6, 0x09e7, 0x04fe, 0x05fa, 0x04a1,
    0x067b, 0x04a3, 0x0c25, 0x036a, 0x0537, 0x083f, 0x0088, 0x04bf,
    0x0b81, 0x05b9, 0x0505, 0x07d7, 0x0a9f, 0x0aa6, 0x08b8, 0x09d0,
    0x004b, 0x009c, 0x0bb8, 0x0b5f, 0x0ba4, 0x0368, 0x0a7d, 0x0636,
    0x08a2, 0x025a, 0x0736, 0x0309, 0x0093, 0x087a, 0x09f7, 0x00f6,
    0x068c, 0x06db, 0x01cc, 0x0123, 0x00eb, 0x0c50, 0x0ab6, 0x0b5b,
    0x0c98, 0x06f3, 0x099a, 0x04e3, 0x09b6, 0x0ad6, 0x0b53, 0x044f,
    0x04fb, 0x0a5c, 0x0429, 0x0b41, 0x02d5, 0x05e4, 0x0940, 0x018e,
    0x03b7, 0x00f7, 0x058d, 0x0c96, 0x09c3, 0x010f, 0x005a, 0x0355,
    0x0744, 0x0c83, 0x048a, 0x0652, 0x029a, 0x0140, 0x0008, 0x0afd,
    0x0608, 0x011a, 0x072e, 0x050d, 0x090a, 0x0228, 0x0a75, 0x083a,
    0x0623, 0x00cd, 0x0b66, 0x0606, 0x0aa1, 0x0a25, 0x0908, 0x02a9,
    0x0082, 0x0642, 0x074f, 0x033d, 0x0b82, 0x0bf9, 0x052d, 0x0ac4,
    0x0745, 0x05c2, 0x04b2, 0x093f, 0x0c4b, 0x06d8, 0x0a93, 0x00ab,
    0x0c37, 0x0be2, 0x0773, 0x072c, 0x05ed, 0x0167, 0x02f6, 0x05a1,
};

#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void mlkem_thumb2_invntt(sword16* r_p)
#else
void mlkem_thumb2_invntt(sword16* r)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register sword16* r __asm__ ("r0") = (sword16*)r_p;
    register word16* L_mlkem_invntt_zetas_inv_c __asm__ ("r1") =
        (word16*)&L_mlkem_invntt_zetas_inv;

#else
    register word16* L_mlkem_invntt_zetas_inv_c =
        (word16*)&L_mlkem_invntt_zetas_inv;

#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "SUB	sp, sp, #0x8\n\t"
        "MOV	r1, %[L_mlkem_invntt_zetas_inv]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "MOV	r12, #0xd01\n\t"
        "MOVT	r12, #0xcff\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "MOV	r3, #0x0\n\t"
        "\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
    "L_mlkem_invntt_loop_765:\n\t"
#else
    "L_mlkem_invntt_loop_765_%=:\n\t"
#endif
        "ADD	lr, r1, r3, LSR #1\n\t"
        "STR	r3, [sp, #4]\n\t"
        "LDR	r2, [%[r]]\n\t"
        "LDR	r3, [%[r], #4]\n\t"
        "LDR	r4, [%[r], #8]\n\t"
        "LDR	r5, [%[r], #12]\n\t"
        "LDR	r6, [%[r], #16]\n\t"
        "LDR	r7, [%[r], #20]\n\t"
        "LDR	r8, [%[r], #24]\n\t"
        "LDR	r9, [%[r], #28]\n\t"
        "LDR	lr, [lr]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r2, r3\n\t"
        "SADD16	r2, r2, r3\n\t"
        "SMULBT	r3, lr, r10\n\t"
        "SMULBB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r3\n\t"
        "SMLABB	r3, r12, r11, r3\n\t"
        "PKHTB	r3, r3, r10, ASR #16\n\t"
#else
        "SUB	r11, r2, r3\n\t"
        "ADD	r12, r2, r3\n\t"
        "BFC	r3, #0, #16\n\t"
        "BFC	r2, #0, #16\n\t"
        "SUB	r10, r2, r3\n\t"
        "ADD	r2, r2, r3\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r2, r12, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r3, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r3, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r3, r12, r11, r3\n\t"
        "BFI	r3, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r4, r5\n\t"
        "SADD16	r4, r4, r5\n\t"
        "SMULTT	r5, lr, r10\n\t"
        "SMULTB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r5\n\t"
        "SMLABB	r5, r12, r11, r5\n\t"
        "PKHTB	r5, r5, r10, ASR #16\n\t"
#else
        "SUB	r11, r4, r5\n\t"
        "ADD	r12, r4, r5\n\t"
        "BFC	r5, #0, #16\n\t"
        "BFC	r4, #0, #16\n\t"
        "SUB	r10, r4, r5\n\t"
        "ADD	r4, r4, r5\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r4, r12, #0, #16\n\t"
        "SBFX	r11, lr, #16, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r5, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r5, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r5, r12, r11, r5\n\t"
        "BFI	r5, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "LDR	lr, [sp, #4]\n\t"
        "ADD	lr, r1, lr, LSR #1\n\t"
        "LDR	lr, [lr, #4]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r6, r7\n\t"
        "SADD16	r6, r6, r7\n\t"
        "SMULBT	r7, lr, r10\n\t"
        "SMULBB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r7\n\t"
        "SMLABB	r7, r12, r11, r7\n\t"
        "PKHTB	r7, r7, r10, ASR #16\n\t"
#else
        "SUB	r11, r6, r7\n\t"
        "ADD	r12, r6, r7\n\t"
        "BFC	r7, #0, #16\n\t"
        "BFC	r6, #0, #16\n\t"
        "SUB	r10, r6, r7\n\t"
        "ADD	r6, r6, r7\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r6, r12, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r7, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r7, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r7, r12, r11, r7\n\t"
        "BFI	r7, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r8, r9\n\t"
        "SADD16	r8, r8, r9\n\t"
        "SMULTT	r9, lr, r10\n\t"
        "SMULTB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r9\n\t"
        "SMLABB	r9, r12, r11, r9\n\t"
        "PKHTB	r9, r9, r10, ASR #16\n\t"
#else
        "SUB	r11, r8, r9\n\t"
        "ADD	r12, r8, r9\n\t"
        "BFC	r9, #0, #16\n\t"
        "BFC	r8, #0, #16\n\t"
        "SUB	r10, r8, r9\n\t"
        "ADD	r8, r8, r9\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r8, r12, #0, #16\n\t"
        "SBFX	r11, lr, #16, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r9, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r9, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r9, r12, r11, r9\n\t"
        "BFI	r9, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "LDR	lr, [sp, #4]\n\t"
        "ADD	lr, r1, lr, LSR #2\n\t"
        "LDR	lr, [lr, #128]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r2, r4\n\t"
        "SADD16	r2, r2, r4\n\t"
        "SMULBT	r4, lr, r10\n\t"
        "SMULBB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r4\n\t"
        "SMLABB	r4, r12, r11, r4\n\t"
        "PKHTB	r4, r4, r10, ASR #16\n\t"
#else
        "SUB	r11, r2, r4\n\t"
        "ADD	r12, r2, r4\n\t"
        "BFC	r4, #0, #16\n\t"
        "BFC	r2, #0, #16\n\t"
        "SUB	r10, r2, r4\n\t"
        "ADD	r2, r2, r4\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r2, r12, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r4, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r4, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r4, r12, r11, r4\n\t"
        "BFI	r4, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r3, r5\n\t"
        "SADD16	r3, r3, r5\n\t"
        "SMULBT	r5, lr, r10\n\t"
        "SMULBB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r5\n\t"
        "SMLABB	r5, r12, r11, r5\n\t"
        "PKHTB	r5, r5, r10, ASR #16\n\t"
#else
        "SUB	r11, r3, r5\n\t"
        "ADD	r12, r3, r5\n\t"
        "BFC	r5, #0, #16\n\t"
        "BFC	r3, #0, #16\n\t"
        "SUB	r10, r3, r5\n\t"
        "ADD	r3, r3, r5\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r3, r12, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r5, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r5, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r5, r12, r11, r5\n\t"
        "BFI	r5, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r6, r8\n\t"
        "SADD16	r6, r6, r8\n\t"
        "SMULTT	r8, lr, r10\n\t"
        "SMULTB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r8\n\t"
        "SMLABB	r8, r12, r11, r8\n\t"
        "PKHTB	r8, r8, r10, ASR #16\n\t"
#else
        "SUB	r11, r6, r8\n\t"
        "ADD	r12, r6, r8\n\t"
        "BFC	r8, #0, #16\n\t"
        "BFC	r6, #0, #16\n\t"
        "SUB	r10, r6, r8\n\t"
        "ADD	r6, r6, r8\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r6, r12, #0, #16\n\t"
        "SBFX	r11, lr, #16, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r8, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r8, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r8, r12, r11, r8\n\t"
        "BFI	r8, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r7, r9\n\t"
        "SADD16	r7, r7, r9\n\t"
        "SMULTT	r9, lr, r10\n\t"
        "SMULTB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r9\n\t"
        "SMLABB	r9, r12, r11, r9\n\t"
        "PKHTB	r9, r9, r10, ASR #16\n\t"
#else
        "SUB	r11, r7, r9\n\t"
        "ADD	r12, r7, r9\n\t"
        "BFC	r9, #0, #16\n\t"
        "BFC	r7, #0, #16\n\t"
        "SUB	r10, r7, r9\n\t"
        "ADD	r7, r7, r9\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r7, r12, #0, #16\n\t"
        "SBFX	r11, lr, #16, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r9, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r9, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r9, r12, r11, r9\n\t"
        "BFI	r9, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "LDR	lr, [sp, #4]\n\t"
        "ADD	lr, r1, lr, LSR #3\n\t"
        "LDR	lr, [lr, #192]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r2, r6\n\t"
        "SADD16	r2, r2, r6\n\t"
        "SMULBT	r6, lr, r10\n\t"
        "SMULBB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r6\n\t"
        "SMLABB	r6, r12, r11, r6\n\t"
        "PKHTB	r6, r6, r10, ASR #16\n\t"
#else
        "SUB	r11, r2, r6\n\t"
        "ADD	r12, r2, r6\n\t"
        "BFC	r6, #0, #16\n\t"
        "BFC	r2, #0, #16\n\t"
        "SUB	r10, r2, r6\n\t"
        "ADD	r2, r2, r6\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r2, r12, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r6, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r6, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r6, r12, r11, r6\n\t"
        "BFI	r6, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r3, r7\n\t"
        "SADD16	r3, r3, r7\n\t"
        "SMULBT	r7, lr, r10\n\t"
        "SMULBB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r7\n\t"
        "SMLABB	r7, r12, r11, r7\n\t"
        "PKHTB	r7, r7, r10, ASR #16\n\t"
#else
        "SUB	r11, r3, r7\n\t"
        "ADD	r12, r3, r7\n\t"
        "BFC	r7, #0, #16\n\t"
        "BFC	r3, #0, #16\n\t"
        "SUB	r10, r3, r7\n\t"
        "ADD	r3, r3, r7\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r3, r12, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r7, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r7, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r7, r12, r11, r7\n\t"
        "BFI	r7, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r4, r8\n\t"
        "SADD16	r4, r4, r8\n\t"
        "SMULBT	r8, lr, r10\n\t"
        "SMULBB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r8\n\t"
        "SMLABB	r8, r12, r11, r8\n\t"
        "PKHTB	r8, r8, r10, ASR #16\n\t"
#else
        "SUB	r11, r4, r8\n\t"
        "ADD	r12, r4, r8\n\t"
        "BFC	r8, #0, #16\n\t"
        "BFC	r4, #0, #16\n\t"
        "SUB	r10, r4, r8\n\t"
        "ADD	r4, r4, r8\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r4, r12, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r8, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r8, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r8, r12, r11, r8\n\t"
        "BFI	r8, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r5, r9\n\t"
        "SADD16	r5, r5, r9\n\t"
        "SMULBT	r9, lr, r10\n\t"
        "SMULBB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r9\n\t"
        "SMLABB	r9, r12, r11, r9\n\t"
        "PKHTB	r9, r9, r10, ASR #16\n\t"
#else
        "SUB	r11, r5, r9\n\t"
        "ADD	r12, r5, r9\n\t"
        "BFC	r9, #0, #16\n\t"
        "BFC	r5, #0, #16\n\t"
        "SUB	r10, r5, r9\n\t"
        "ADD	r5, r5, r9\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r5, r12, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r9, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r9, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r9, r12, r11, r9\n\t"
        "BFI	r9, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "MOV	lr, #0xafc0\n\t"
        "MOVT	lr, #0x13\n\t"
#else
        "MOV	lr, #0x4ebf\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULWB	r10, lr, r2\n\t"
        "SMULWT	r11, lr, r2\n\t"
        "SMULBT	r10, r12, r10\n\t"
        "SMULBT	r11, r12, r11\n\t"
        "PKHBT	r10, r10, r11, LSL #16\n\t"
        "SSUB16	r2, r2, r10\n\t"
#else
        "SBFX	r10, r2, #0, #16\n\t"
        "SBFX	r11, r2, #16, #16\n\t"
        "MUL	r10, lr, r10\n\t"
        "MUL	r11, lr, r11\n\t"
        "ASR	r10, r10, #26\n\t"
        "ASR	r11, r11, #26\n\t"
        "MUL	r10, r12, r10\n\t"
        "MUL	r11, r12, r11\n\t"
        "SUB	r11, r2, r11, LSL #16\n\t"
        "SUB	r2, r2, r10\n\t"
        "LSR	r11, r11, #16\n\t"
        "BFI	r2, r11, #16, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULWB	r10, lr, r3\n\t"
        "SMULWT	r11, lr, r3\n\t"
        "SMULBT	r10, r12, r10\n\t"
        "SMULBT	r11, r12, r11\n\t"
        "PKHBT	r10, r10, r11, LSL #16\n\t"
        "SSUB16	r3, r3, r10\n\t"
#else
        "SBFX	r10, r3, #0, #16\n\t"
        "SBFX	r11, r3, #16, #16\n\t"
        "MUL	r10, lr, r10\n\t"
        "MUL	r11, lr, r11\n\t"
        "ASR	r10, r10, #26\n\t"
        "ASR	r11, r11, #26\n\t"
        "MUL	r10, r12, r10\n\t"
        "MUL	r11, r12, r11\n\t"
        "SUB	r11, r3, r11, LSL #16\n\t"
        "SUB	r3, r3, r10\n\t"
        "LSR	r11, r11, #16\n\t"
        "BFI	r3, r11, #16, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULWB	r10, lr, r4\n\t"
        "SMULWT	r11, lr, r4\n\t"
        "SMULBT	r10, r12, r10\n\t"
        "SMULBT	r11, r12, r11\n\t"
        "PKHBT	r10, r10, r11, LSL #16\n\t"
        "SSUB16	r4, r4, r10\n\t"
#else
        "SBFX	r10, r4, #0, #16\n\t"
        "SBFX	r11, r4, #16, #16\n\t"
        "MUL	r10, lr, r10\n\t"
        "MUL	r11, lr, r11\n\t"
        "ASR	r10, r10, #26\n\t"
        "ASR	r11, r11, #26\n\t"
        "MUL	r10, r12, r10\n\t"
        "MUL	r11, r12, r11\n\t"
        "SUB	r11, r4, r11, LSL #16\n\t"
        "SUB	r4, r4, r10\n\t"
        "LSR	r11, r11, #16\n\t"
        "BFI	r4, r11, #16, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULWB	r10, lr, r5\n\t"
        "SMULWT	r11, lr, r5\n\t"
        "SMULBT	r10, r12, r10\n\t"
        "SMULBT	r11, r12, r11\n\t"
        "PKHBT	r10, r10, r11, LSL #16\n\t"
        "SSUB16	r5, r5, r10\n\t"
#else
        "SBFX	r10, r5, #0, #16\n\t"
        "SBFX	r11, r5, #16, #16\n\t"
        "MUL	r10, lr, r10\n\t"
        "MUL	r11, lr, r11\n\t"
        "ASR	r10, r10, #26\n\t"
        "ASR	r11, r11, #26\n\t"
        "MUL	r10, r12, r10\n\t"
        "MUL	r11, r12, r11\n\t"
        "SUB	r11, r5, r11, LSL #16\n\t"
        "SUB	r5, r5, r10\n\t"
        "LSR	r11, r11, #16\n\t"
        "BFI	r5, r11, #16, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "STR	r2, [%[r]]\n\t"
        "STR	r3, [%[r], #4]\n\t"
        "STR	r4, [%[r], #8]\n\t"
        "STR	r5, [%[r], #12]\n\t"
        "STR	r6, [%[r], #16]\n\t"
        "STR	r7, [%[r], #20]\n\t"
        "STR	r8, [%[r], #24]\n\t"
        "STR	r9, [%[r], #28]\n\t"
        "LDR	r3, [sp, #4]\n\t"
        "ADD	r3, r3, #0x10\n\t"
        "RSBS	r10, r3, #0x100\n\t"
        "ADD	%[r], %[r], #0x20\n\t"
#if defined(__GNUC__)
        "BNE	L_mlkem_invntt_loop_765_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BNE.N	L_mlkem_invntt_loop_765\n\t"
#else
        "BNE.N	L_mlkem_invntt_loop_765_%=\n\t"
#endif
        "SUB	%[r], %[r], #0x200\n\t"
        "MOV	r3, #0x0\n\t"
        "\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
    "L_mlkem_invntt_loop_4_j:\n\t"
#else
    "L_mlkem_invntt_loop_4_j_%=:\n\t"
#endif
        "STR	r3, [sp, #4]\n\t"
        "ADD	lr, r1, r3, LSR #4\n\t"
        "MOV	r2, #0x4\n\t"
        "LDR	lr, [lr, #224]\n\t"
        "\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
    "L_mlkem_invntt_loop_4_i:\n\t"
#else
    "L_mlkem_invntt_loop_4_i_%=:\n\t"
#endif
        "STR	r2, [sp]\n\t"
        "LDR	r2, [%[r]]\n\t"
        "LDR	r3, [%[r], #16]\n\t"
        "LDR	r4, [%[r], #32]\n\t"
        "LDR	r5, [%[r], #48]\n\t"
        "LDR	r6, [%[r], #64]\n\t"
        "LDR	r7, [%[r], #80]\n\t"
        "LDR	r8, [%[r], #96]\n\t"
        "LDR	r9, [%[r], #112]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r2, r4\n\t"
        "SADD16	r2, r2, r4\n\t"
        "SMULBT	r4, lr, r10\n\t"
        "SMULBB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r4\n\t"
        "SMLABB	r4, r12, r11, r4\n\t"
        "PKHTB	r4, r4, r10, ASR #16\n\t"
#else
        "SUB	r11, r2, r4\n\t"
        "ADD	r12, r2, r4\n\t"
        "BFC	r4, #0, #16\n\t"
        "BFC	r2, #0, #16\n\t"
        "SUB	r10, r2, r4\n\t"
        "ADD	r2, r2, r4\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r2, r12, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r4, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r4, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r4, r12, r11, r4\n\t"
        "BFI	r4, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r3, r5\n\t"
        "SADD16	r3, r3, r5\n\t"
        "SMULBT	r5, lr, r10\n\t"
        "SMULBB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r5\n\t"
        "SMLABB	r5, r12, r11, r5\n\t"
        "PKHTB	r5, r5, r10, ASR #16\n\t"
#else
        "SUB	r11, r3, r5\n\t"
        "ADD	r12, r3, r5\n\t"
        "BFC	r5, #0, #16\n\t"
        "BFC	r3, #0, #16\n\t"
        "SUB	r10, r3, r5\n\t"
        "ADD	r3, r3, r5\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r3, r12, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r5, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r5, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r5, r12, r11, r5\n\t"
        "BFI	r5, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r6, r8\n\t"
        "SADD16	r6, r6, r8\n\t"
        "SMULTT	r8, lr, r10\n\t"
        "SMULTB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r8\n\t"
        "SMLABB	r8, r12, r11, r8\n\t"
        "PKHTB	r8, r8, r10, ASR #16\n\t"
#else
        "SUB	r11, r6, r8\n\t"
        "ADD	r12, r6, r8\n\t"
        "BFC	r8, #0, #16\n\t"
        "BFC	r6, #0, #16\n\t"
        "SUB	r10, r6, r8\n\t"
        "ADD	r6, r6, r8\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r6, r12, #0, #16\n\t"
        "SBFX	r11, lr, #16, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r8, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r8, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r8, r12, r11, r8\n\t"
        "BFI	r8, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r7, r9\n\t"
        "SADD16	r7, r7, r9\n\t"
        "SMULTT	r9, lr, r10\n\t"
        "SMULTB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r9\n\t"
        "SMLABB	r9, r12, r11, r9\n\t"
        "PKHTB	r9, r9, r10, ASR #16\n\t"
#else
        "SUB	r11, r7, r9\n\t"
        "ADD	r12, r7, r9\n\t"
        "BFC	r9, #0, #16\n\t"
        "BFC	r7, #0, #16\n\t"
        "SUB	r10, r7, r9\n\t"
        "ADD	r7, r7, r9\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r7, r12, #0, #16\n\t"
        "SBFX	r11, lr, #16, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r9, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r9, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r9, r12, r11, r9\n\t"
        "BFI	r9, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "STR	r2, [%[r]]\n\t"
        "STR	r3, [%[r], #16]\n\t"
        "STR	r4, [%[r], #32]\n\t"
        "STR	r5, [%[r], #48]\n\t"
        "STR	r6, [%[r], #64]\n\t"
        "STR	r7, [%[r], #80]\n\t"
        "STR	r8, [%[r], #96]\n\t"
        "STR	r9, [%[r], #112]\n\t"
        "LDRD	r2, r3, [sp]\n\t"
        "SUBS	r2, r2, #0x1\n\t"
        "ADD	%[r], %[r], #0x4\n\t"
#if defined(__GNUC__)
        "BNE	L_mlkem_invntt_loop_4_i_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BNE.N	L_mlkem_invntt_loop_4_i\n\t"
#else
        "BNE.N	L_mlkem_invntt_loop_4_i_%=\n\t"
#endif
        "ADD	r3, r3, #0x40\n\t"
        "RSBS	r10, r3, #0x100\n\t"
        "ADD	%[r], %[r], #0x70\n\t"
#if defined(__GNUC__)
        "BNE	L_mlkem_invntt_loop_4_j_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BNE.N	L_mlkem_invntt_loop_4_j\n\t"
#else
        "BNE.N	L_mlkem_invntt_loop_4_j_%=\n\t"
#endif
        "SUB	%[r], %[r], #0x200\n\t"
        "MOV	r2, #0x10\n\t"
        "\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
    "L_mlkem_invntt_loop_321:\n\t"
#else
    "L_mlkem_invntt_loop_321_%=:\n\t"
#endif
        "STR	r2, [sp]\n\t"
        "LDRH	lr, [r1, #2]\n\t"
        "LDR	r2, [%[r]]\n\t"
        "LDR	r3, [%[r], #64]\n\t"
        "LDR	r4, [%[r], #128]\n\t"
        "LDR	r5, [%[r], #192]\n\t"
        "LDR	r6, [%[r], #256]\n\t"
        "LDR	r7, [%[r], #320]\n\t"
        "LDR	r8, [%[r], #384]\n\t"
        "LDR	r9, [%[r], #448]\n\t"
        "LDR	lr, [r1, #240]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r2, r3\n\t"
        "SADD16	r2, r2, r3\n\t"
        "SMULBT	r3, lr, r10\n\t"
        "SMULBB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r3\n\t"
        "SMLABB	r3, r12, r11, r3\n\t"
        "PKHTB	r3, r3, r10, ASR #16\n\t"
#else
        "SUB	r11, r2, r3\n\t"
        "ADD	r12, r2, r3\n\t"
        "BFC	r3, #0, #16\n\t"
        "BFC	r2, #0, #16\n\t"
        "SUB	r10, r2, r3\n\t"
        "ADD	r2, r2, r3\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r2, r12, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r3, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r3, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r3, r12, r11, r3\n\t"
        "BFI	r3, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r4, r5\n\t"
        "SADD16	r4, r4, r5\n\t"
        "SMULTT	r5, lr, r10\n\t"
        "SMULTB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r5\n\t"
        "SMLABB	r5, r12, r11, r5\n\t"
        "PKHTB	r5, r5, r10, ASR #16\n\t"
#else
        "SUB	r11, r4, r5\n\t"
        "ADD	r12, r4, r5\n\t"
        "BFC	r5, #0, #16\n\t"
        "BFC	r4, #0, #16\n\t"
        "SUB	r10, r4, r5\n\t"
        "ADD	r4, r4, r5\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r4, r12, #0, #16\n\t"
        "SBFX	r11, lr, #16, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r5, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r5, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r5, r12, r11, r5\n\t"
        "BFI	r5, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "LDR	lr, [r1, #244]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r6, r7\n\t"
        "SADD16	r6, r6, r7\n\t"
        "SMULBT	r7, lr, r10\n\t"
        "SMULBB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r7\n\t"
        "SMLABB	r7, r12, r11, r7\n\t"
        "PKHTB	r7, r7, r10, ASR #16\n\t"
#else
        "SUB	r11, r6, r7\n\t"
        "ADD	r12, r6, r7\n\t"
        "BFC	r7, #0, #16\n\t"
        "BFC	r6, #0, #16\n\t"
        "SUB	r10, r6, r7\n\t"
        "ADD	r6, r6, r7\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r6, r12, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r7, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r7, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r7, r12, r11, r7\n\t"
        "BFI	r7, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r8, r9\n\t"
        "SADD16	r8, r8, r9\n\t"
        "SMULTT	r9, lr, r10\n\t"
        "SMULTB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r9\n\t"
        "SMLABB	r9, r12, r11, r9\n\t"
        "PKHTB	r9, r9, r10, ASR #16\n\t"
#else
        "SUB	r11, r8, r9\n\t"
        "ADD	r12, r8, r9\n\t"
        "BFC	r9, #0, #16\n\t"
        "BFC	r8, #0, #16\n\t"
        "SUB	r10, r8, r9\n\t"
        "ADD	r8, r8, r9\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r8, r12, #0, #16\n\t"
        "SBFX	r11, lr, #16, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r9, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r9, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r9, r12, r11, r9\n\t"
        "BFI	r9, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "LDR	lr, [r1, #248]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r2, r4\n\t"
        "SADD16	r2, r2, r4\n\t"
        "SMULBT	r4, lr, r10\n\t"
        "SMULBB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r4\n\t"
        "SMLABB	r4, r12, r11, r4\n\t"
        "PKHTB	r4, r4, r10, ASR #16\n\t"
#else
        "SUB	r11, r2, r4\n\t"
        "ADD	r12, r2, r4\n\t"
        "BFC	r4, #0, #16\n\t"
        "BFC	r2, #0, #16\n\t"
        "SUB	r10, r2, r4\n\t"
        "ADD	r2, r2, r4\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r2, r12, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r4, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r4, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r4, r12, r11, r4\n\t"
        "BFI	r4, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r3, r5\n\t"
        "SADD16	r3, r3, r5\n\t"
        "SMULBT	r5, lr, r10\n\t"
        "SMULBB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r5\n\t"
        "SMLABB	r5, r12, r11, r5\n\t"
        "PKHTB	r5, r5, r10, ASR #16\n\t"
#else
        "SUB	r11, r3, r5\n\t"
        "ADD	r12, r3, r5\n\t"
        "BFC	r5, #0, #16\n\t"
        "BFC	r3, #0, #16\n\t"
        "SUB	r10, r3, r5\n\t"
        "ADD	r3, r3, r5\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r3, r12, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r5, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r5, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r5, r12, r11, r5\n\t"
        "BFI	r5, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r6, r8\n\t"
        "SADD16	r6, r6, r8\n\t"
        "SMULTT	r8, lr, r10\n\t"
        "SMULTB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r8\n\t"
        "SMLABB	r8, r12, r11, r8\n\t"
        "PKHTB	r8, r8, r10, ASR #16\n\t"
#else
        "SUB	r11, r6, r8\n\t"
        "ADD	r12, r6, r8\n\t"
        "BFC	r8, #0, #16\n\t"
        "BFC	r6, #0, #16\n\t"
        "SUB	r10, r6, r8\n\t"
        "ADD	r6, r6, r8\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r6, r12, #0, #16\n\t"
        "SBFX	r11, lr, #16, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r8, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r8, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r8, r12, r11, r8\n\t"
        "BFI	r8, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r7, r9\n\t"
        "SADD16	r7, r7, r9\n\t"
        "SMULTT	r9, lr, r10\n\t"
        "SMULTB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r9\n\t"
        "SMLABB	r9, r12, r11, r9\n\t"
        "PKHTB	r9, r9, r10, ASR #16\n\t"
#else
        "SUB	r11, r7, r9\n\t"
        "ADD	r12, r7, r9\n\t"
        "BFC	r9, #0, #16\n\t"
        "BFC	r7, #0, #16\n\t"
        "SUB	r10, r7, r9\n\t"
        "ADD	r7, r7, r9\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r7, r12, #0, #16\n\t"
        "SBFX	r11, lr, #16, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r9, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r9, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r9, r12, r11, r9\n\t"
        "BFI	r9, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "MOV	lr, #0xafc0\n\t"
        "MOVT	lr, #0x13\n\t"
#else
        "MOV	lr, #0x4ebf\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULWB	r10, lr, r2\n\t"
        "SMULWT	r11, lr, r2\n\t"
        "SMULBT	r10, r12, r10\n\t"
        "SMULBT	r11, r12, r11\n\t"
        "PKHBT	r10, r10, r11, LSL #16\n\t"
        "SSUB16	r2, r2, r10\n\t"
#else
        "SBFX	r10, r2, #0, #16\n\t"
        "SBFX	r11, r2, #16, #16\n\t"
        "MUL	r10, lr, r10\n\t"
        "MUL	r11, lr, r11\n\t"
        "ASR	r10, r10, #26\n\t"
        "ASR	r11, r11, #26\n\t"
        "MUL	r10, r12, r10\n\t"
        "MUL	r11, r12, r11\n\t"
        "SUB	r11, r2, r11, LSL #16\n\t"
        "SUB	r2, r2, r10\n\t"
        "LSR	r11, r11, #16\n\t"
        "BFI	r2, r11, #16, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULWB	r10, lr, r3\n\t"
        "SMULWT	r11, lr, r3\n\t"
        "SMULBT	r10, r12, r10\n\t"
        "SMULBT	r11, r12, r11\n\t"
        "PKHBT	r10, r10, r11, LSL #16\n\t"
        "SSUB16	r3, r3, r10\n\t"
#else
        "SBFX	r10, r3, #0, #16\n\t"
        "SBFX	r11, r3, #16, #16\n\t"
        "MUL	r10, lr, r10\n\t"
        "MUL	r11, lr, r11\n\t"
        "ASR	r10, r10, #26\n\t"
        "ASR	r11, r11, #26\n\t"
        "MUL	r10, r12, r10\n\t"
        "MUL	r11, r12, r11\n\t"
        "SUB	r11, r3, r11, LSL #16\n\t"
        "SUB	r3, r3, r10\n\t"
        "LSR	r11, r11, #16\n\t"
        "BFI	r3, r11, #16, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULWB	r10, lr, r4\n\t"
        "SMULWT	r11, lr, r4\n\t"
        "SMULBT	r10, r12, r10\n\t"
        "SMULBT	r11, r12, r11\n\t"
        "PKHBT	r10, r10, r11, LSL #16\n\t"
        "SSUB16	r4, r4, r10\n\t"
#else
        "SBFX	r10, r4, #0, #16\n\t"
        "SBFX	r11, r4, #16, #16\n\t"
        "MUL	r10, lr, r10\n\t"
        "MUL	r11, lr, r11\n\t"
        "ASR	r10, r10, #26\n\t"
        "ASR	r11, r11, #26\n\t"
        "MUL	r10, r12, r10\n\t"
        "MUL	r11, r12, r11\n\t"
        "SUB	r11, r4, r11, LSL #16\n\t"
        "SUB	r4, r4, r10\n\t"
        "LSR	r11, r11, #16\n\t"
        "BFI	r4, r11, #16, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULWB	r10, lr, r5\n\t"
        "SMULWT	r11, lr, r5\n\t"
        "SMULBT	r10, r12, r10\n\t"
        "SMULBT	r11, r12, r11\n\t"
        "PKHBT	r10, r10, r11, LSL #16\n\t"
        "SSUB16	r5, r5, r10\n\t"
#else
        "SBFX	r10, r5, #0, #16\n\t"
        "SBFX	r11, r5, #16, #16\n\t"
        "MUL	r10, lr, r10\n\t"
        "MUL	r11, lr, r11\n\t"
        "ASR	r10, r10, #26\n\t"
        "ASR	r11, r11, #26\n\t"
        "MUL	r10, r12, r10\n\t"
        "MUL	r11, r12, r11\n\t"
        "SUB	r11, r5, r11, LSL #16\n\t"
        "SUB	r5, r5, r10\n\t"
        "LSR	r11, r11, #16\n\t"
        "BFI	r5, r11, #16, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "LDR	lr, [r1, #252]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r2, r6\n\t"
        "SADD16	r2, r2, r6\n\t"
        "SMULBT	r6, lr, r10\n\t"
        "SMULBB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r6\n\t"
        "SMLABB	r6, r12, r11, r6\n\t"
        "PKHTB	r6, r6, r10, ASR #16\n\t"
#else
        "SUB	r11, r2, r6\n\t"
        "ADD	r12, r2, r6\n\t"
        "BFC	r6, #0, #16\n\t"
        "BFC	r2, #0, #16\n\t"
        "SUB	r10, r2, r6\n\t"
        "ADD	r2, r2, r6\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r2, r12, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r6, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r6, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r6, r12, r11, r6\n\t"
        "BFI	r6, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r3, r7\n\t"
        "SADD16	r3, r3, r7\n\t"
        "SMULBT	r7, lr, r10\n\t"
        "SMULBB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r7\n\t"
        "SMLABB	r7, r12, r11, r7\n\t"
        "PKHTB	r7, r7, r10, ASR #16\n\t"
#else
        "SUB	r11, r3, r7\n\t"
        "ADD	r12, r3, r7\n\t"
        "BFC	r7, #0, #16\n\t"
        "BFC	r3, #0, #16\n\t"
        "SUB	r10, r3, r7\n\t"
        "ADD	r3, r3, r7\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r3, r12, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r7, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r7, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r7, r12, r11, r7\n\t"
        "BFI	r7, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r4, r8\n\t"
        "SADD16	r4, r4, r8\n\t"
        "SMULBT	r8, lr, r10\n\t"
        "SMULBB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r8\n\t"
        "SMLABB	r8, r12, r11, r8\n\t"
        "PKHTB	r8, r8, r10, ASR #16\n\t"
#else
        "SUB	r11, r4, r8\n\t"
        "ADD	r12, r4, r8\n\t"
        "BFC	r8, #0, #16\n\t"
        "BFC	r4, #0, #16\n\t"
        "SUB	r10, r4, r8\n\t"
        "ADD	r4, r4, r8\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r4, r12, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r8, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r8, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r8, r12, r11, r8\n\t"
        "BFI	r8, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r10, r5, r9\n\t"
        "SADD16	r5, r5, r9\n\t"
        "SMULBT	r9, lr, r10\n\t"
        "SMULBB	r10, lr, r10\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r9\n\t"
        "SMLABB	r9, r12, r11, r9\n\t"
        "PKHTB	r9, r9, r10, ASR #16\n\t"
#else
        "SUB	r11, r5, r9\n\t"
        "ADD	r12, r5, r9\n\t"
        "BFC	r9, #0, #16\n\t"
        "BFC	r5, #0, #16\n\t"
        "SUB	r10, r5, r9\n\t"
        "ADD	r5, r5, r9\n\t"
        "BFI	r10, r11, #0, #16\n\t"
        "BFI	r5, r12, #0, #16\n\t"
        "SBFX	r11, lr, #0, #16\n\t"
        "ASR	r12, r10, #16\n\t"
        "MUL	r9, r11, r12\n\t"
        "SBFX	r10, r10, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r9, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r9, r12, r11, r9\n\t"
        "BFI	r9, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "LDR	lr, [r1, #254]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r2\n\t"
        "SMULBT	r2, lr, r2\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r2\n\t"
        "SMLABB	r2, r12, r11, r2\n\t"
        "PKHTB	r2, r2, r10, ASR #16\n\t"
#else
        "SBFX	r11, lr, #0, #16\n\t"
        "SBFX	r10, r2, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "SBFX	r2, r2, #16, #16\n\t"
        "MUL	r2, r11, r2\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r2, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r2, r12, r11, r2\n\t"
        "BFI	r2, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r3\n\t"
        "SMULBT	r3, lr, r3\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r3\n\t"
        "SMLABB	r3, r12, r11, r3\n\t"
        "PKHTB	r3, r3, r10, ASR #16\n\t"
#else
        "SBFX	r11, lr, #0, #16\n\t"
        "SBFX	r10, r3, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "SBFX	r3, r3, #16, #16\n\t"
        "MUL	r3, r11, r3\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r3, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r3, r12, r11, r3\n\t"
        "BFI	r3, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r4\n\t"
        "SMULBT	r4, lr, r4\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r4\n\t"
        "SMLABB	r4, r12, r11, r4\n\t"
        "PKHTB	r4, r4, r10, ASR #16\n\t"
#else
        "SBFX	r11, lr, #0, #16\n\t"
        "SBFX	r10, r4, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "SBFX	r4, r4, #16, #16\n\t"
        "MUL	r4, r11, r4\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r4, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r4, r12, r11, r4\n\t"
        "BFI	r4, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r5\n\t"
        "SMULBT	r5, lr, r5\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r5\n\t"
        "SMLABB	r5, r12, r11, r5\n\t"
        "PKHTB	r5, r5, r10, ASR #16\n\t"
#else
        "SBFX	r11, lr, #0, #16\n\t"
        "SBFX	r10, r5, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "SBFX	r5, r5, #16, #16\n\t"
        "MUL	r5, r11, r5\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r5, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r5, r12, r11, r5\n\t"
        "BFI	r5, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r6\n\t"
        "SMULBT	r6, lr, r6\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r6\n\t"
        "SMLABB	r6, r12, r11, r6\n\t"
        "PKHTB	r6, r6, r10, ASR #16\n\t"
#else
        "SBFX	r11, lr, #0, #16\n\t"
        "SBFX	r10, r6, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "SBFX	r6, r6, #16, #16\n\t"
        "MUL	r6, r11, r6\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r6, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r6, r12, r11, r6\n\t"
        "BFI	r6, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r7\n\t"
        "SMULBT	r7, lr, r7\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r7\n\t"
        "SMLABB	r7, r12, r11, r7\n\t"
        "PKHTB	r7, r7, r10, ASR #16\n\t"
#else
        "SBFX	r11, lr, #0, #16\n\t"
        "SBFX	r10, r7, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "SBFX	r7, r7, #16, #16\n\t"
        "MUL	r7, r11, r7\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r7, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r7, r12, r11, r7\n\t"
        "BFI	r7, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r8\n\t"
        "SMULBT	r8, lr, r8\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r8\n\t"
        "SMLABB	r8, r12, r11, r8\n\t"
        "PKHTB	r8, r8, r10, ASR #16\n\t"
#else
        "SBFX	r11, lr, #0, #16\n\t"
        "SBFX	r10, r8, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "SBFX	r8, r8, #16, #16\n\t"
        "MUL	r8, r11, r8\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r8, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r8, r12, r11, r8\n\t"
        "BFI	r8, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULBB	r10, lr, r9\n\t"
        "SMULBT	r9, lr, r9\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULTB	r11, r12, r9\n\t"
        "SMLABB	r9, r12, r11, r9\n\t"
        "PKHTB	r9, r9, r10, ASR #16\n\t"
#else
        "SBFX	r11, lr, #0, #16\n\t"
        "SBFX	r10, r9, #0, #16\n\t"
        "MUL	r10, r11, r10\n\t"
        "SBFX	r9, r9, #16, #16\n\t"
        "MUL	r9, r11, r9\n\t"
        "MOV	r12, #0xcff\n\t"
        "MUL	r11, r12, r10\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r11, r9, #0, #16\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "LSR	r10, r10, #16\n\t"
        "MLA	r9, r12, r11, r9\n\t"
        "BFI	r9, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "STR	r2, [%[r]]\n\t"
        "STR	r3, [%[r], #64]\n\t"
        "STR	r4, [%[r], #128]\n\t"
        "STR	r5, [%[r], #192]\n\t"
        "STR	r6, [%[r], #256]\n\t"
        "STR	r7, [%[r], #320]\n\t"
        "STR	r8, [%[r], #384]\n\t"
        "STR	r9, [%[r], #448]\n\t"
        "LDR	r2, [sp]\n\t"
        "SUBS	r2, r2, #0x1\n\t"
        "ADD	%[r], %[r], #0x4\n\t"
#if defined(__GNUC__)
        "BNE	L_mlkem_invntt_loop_321_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BNE.N	L_mlkem_invntt_loop_321\n\t"
#else
        "BNE.N	L_mlkem_invntt_loop_321_%=\n\t"
#endif
        "ADD	sp, sp, #0x8\n\t"
        : [r] "+r" (r),
          [L_mlkem_invntt_zetas_inv] "+r" (L_mlkem_invntt_zetas_inv_c)
        :
        : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
            "r11", "r12", "lr"
    );
}

XALIGNED(16) static const word16 L_mlkem_basemul_mont_zetas[] = {
    0x08ed, 0x0a0b, 0x0b9a, 0x0714, 0x05d5, 0x058e, 0x011f, 0x00ca,
    0x0c56, 0x026e, 0x0629, 0x00b6, 0x03c2, 0x084f, 0x073f, 0x05bc,
    0x023d, 0x07d4, 0x0108, 0x017f, 0x09c4, 0x05b2, 0x06bf, 0x0c7f,
    0x0a58, 0x03f9, 0x02dc, 0x0260, 0x06fb, 0x019b, 0x0c34, 0x06de,
    0x04c7, 0x028c, 0x0ad9, 0x03f7, 0x07f4, 0x05d3, 0x0be7, 0x06f9,
    0x0204, 0x0cf9, 0x0bc1, 0x0a67, 0x06af, 0x0877, 0x007e, 0x05bd,
    0x09ac, 0x0ca7, 0x0bf2, 0x033e, 0x006b, 0x0774, 0x0c0a, 0x094a,
    0x0b73, 0x03c1, 0x071d, 0x0a2c, 0x01c0, 0x08d8, 0x02a5, 0x0806,
    0x08b2, 0x01ae, 0x022b, 0x034b, 0x081e, 0x0367, 0x060e, 0x0069,
    0x01a6, 0x024b, 0x00b1, 0x0c16, 0x0bde, 0x0b35, 0x0626, 0x0675,
    0x0c0b, 0x030a, 0x0487, 0x0c6e, 0x09f8, 0x05cb, 0x0aa7, 0x045f,
    0x06cb, 0x0284, 0x0999, 0x015d, 0x01a2, 0x0149, 0x0c65, 0x0cb6,
    0x0331, 0x0449, 0x025b, 0x0262, 0x052a, 0x07fc, 0x0748, 0x0180,
    0x0842, 0x0c79, 0x04c2, 0x07ca, 0x0997, 0x00dc, 0x085e, 0x0686,
    0x0860, 0x0707, 0x0803, 0x031a, 0x071b, 0x09ab, 0x099b, 0x01de,
    0x0c95, 0x0bcd, 0x03e4, 0x03df, 0x03be, 0x074d, 0x05f2, 0x065c,
};

#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void mlkem_thumb2_basemul_mont(sword16* r_p, const sword16* a_p,
    const sword16* b_p)
#else
void mlkem_thumb2_basemul_mont(sword16* r, const sword16* a, const sword16* b)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register sword16* r __asm__ ("r0") = (sword16*)r_p;
    register const sword16* a __asm__ ("r1") = (const sword16*)a_p;
    register const sword16* b __asm__ ("r2") = (const sword16*)b_p;
    register word16* L_mlkem_basemul_mont_zetas_c __asm__ ("r3") =
        (word16*)&L_mlkem_basemul_mont_zetas;

#else
    register word16* L_mlkem_basemul_mont_zetas_c =
        (word16*)&L_mlkem_basemul_mont_zetas;

#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "MOV	r3, %[L_mlkem_basemul_mont_zetas]\n\t"
        "ADD	r3, r3, #0x80\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "MOV	r12, #0xd01\n\t"
        "MOVT	r12, #0xcff\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "MOV	r8, #0x0\n\t"
        "\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
    "L_mlkem_basemul_mont_loop:\n\t"
#else
    "L_mlkem_basemul_mont_loop_%=:\n\t"
#endif
        "LDM	%[a]!, {r4, r5}\n\t"
        "LDM	%[b]!, {r6, r7}\n\t"
        "LDR	lr, [r3, r8]\n\t"
        "ADD	r8, r8, #0x2\n\t"
        "PUSH	{r8}\n\t"
        "CMP	r8, #0x80\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULTT	r8, r4, r6\n\t"
        "SMULTT	r10, r5, r7\n\t"
        "SMULTB	r9, r12, r8\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r8, r12, r9, r8\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "RSB	r11, lr, #0x0\n\t"
        "SMULBT	r8, lr, r8\n\t"
        "SMULBT	r10, r11, r10\n\t"
        "SMLABB	r8, r4, r6, r8\n\t"
        "SMLABB	r10, r5, r7, r10\n\t"
        "SMULTB	r9, r12, r8\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r8, r12, r9, r8\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULBT	r9, r4, r6\n\t"
        "SMULBT	r11, r5, r7\n\t"
        "SMLATB	r9, r4, r6, r9\n\t"
        "SMLATB	r11, r5, r7, r11\n\t"
        "SMULTB	r6, r12, r9\n\t"
        "SMULTB	r7, r12, r11\n\t"
        "SMLABB	r9, r12, r6, r9\n\t"
        "SMLABB	r11, r12, r7, r11\n\t"
        "PKHTB	r4, r9, r8, ASR #16\n\t"
        "PKHTB	r5, r11, r10, ASR #16\n\t"
#else
        "ASR	r8, r4, #16\n\t"
        "ASR	r10, r5, #16\n\t"
        "ASR	r9, r6, #16\n\t"
        "ASR	r11, r7, #16\n\t"
        "MUL	r8, r8, r9\n\t"
        "MUL	r10, r10, r11\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r9, r8, #0, #16\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r9, r12, r8\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r9, r9, #0, #16\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r8, r12, r9, r8\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "RSB	r11, lr, #0x0\n\t"
        "SBFX	r9, lr, #0, #16\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "ASR	r8, r8, #16\n\t"
        "ASR	r10, r10, #16\n\t"
        "MUL	r8, r9, r8\n\t"
        "MUL	r10, r11, r10\n\t"
        "SBFX	r9, r4, #0, #16\n\t"
        "SBFX	r11, r5, #0, #16\n\t"
        "SBFX	r12, r6, #0, #16\n\t"
        "MLA	r8, r9, r12, r8\n\t"
        "SBFX	r12, r7, #0, #16\n\t"
        "MLA	r10, r11, r12, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r9, r8, #0, #16\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r9, r12, r9\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r9, r9, #0, #16\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r8, r12, r9, r8\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "SBFX	r9, r4, #0, #16\n\t"
        "SBFX	r11, r5, #0, #16\n\t"
        "ASR	r12, r6, #16\n\t"
        "MUL	r9, r9, r12\n\t"
        "ASR	r12, r7, #16\n\t"
        "MUL	r11, r11, r12\n\t"
        "ASR	r4, r4, #16\n\t"
        "ASR	r5, r5, #16\n\t"
        "SBFX	r12, r6, #0, #16\n\t"
        "MLA	r9, r4, r12, r9\n\t"
        "SBFX	r12, r7, #0, #16\n\t"
        "MLA	r11, r5, r12, r11\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r6, r9, #0, #16\n\t"
        "SBFX	r7, r11, #0, #16\n\t"
        "MUL	r6, r12, r6\n\t"
        "MUL	r7, r12, r7\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r4, r6, #0, #16\n\t"
        "SBFX	r5, r7, #0, #16\n\t"
        "MLA	r9, r12, r4, r9\n\t"
        "MLA	r11, r12, r5, r11\n\t"
        "BFC	r9, #0, #16\n\t"
        "BFC	r11, #0, #16\n\t"
        "ORR	r4, r9, r8, LSR #16\n\t"
        "ORR	r5, r11, r10, LSR #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "STM	%[r]!, {r4, r5}\n\t"
        "POP	{r8}\n\t"
#if defined(__GNUC__)
        "BNE	L_mlkem_basemul_mont_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BNE.N	L_mlkem_basemul_mont_loop\n\t"
#else
        "BNE.N	L_mlkem_basemul_mont_loop_%=\n\t"
#endif
        : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b),
          [L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c)
        :
        : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11",
            "r12", "lr"
    );
}

#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void mlkem_thumb2_basemul_mont_add(sword16* r_p, const sword16* a_p,
    const sword16* b_p)
#else
void mlkem_thumb2_basemul_mont_add(sword16* r, const sword16* a,
    const sword16* b)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register sword16* r __asm__ ("r0") = (sword16*)r_p;
    register const sword16* a __asm__ ("r1") = (const sword16*)a_p;
    register const sword16* b __asm__ ("r2") = (const sword16*)b_p;
    register word16* L_mlkem_basemul_mont_zetas_c __asm__ ("r3") =
        (word16*)&L_mlkem_basemul_mont_zetas;

#else
    register word16* L_mlkem_basemul_mont_zetas_c =
        (word16*)&L_mlkem_basemul_mont_zetas;

#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "MOV	r3, %[L_mlkem_basemul_mont_zetas]\n\t"
        "ADD	r3, r3, #0x80\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "MOV	r12, #0xd01\n\t"
        "MOVT	r12, #0xcff\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "MOV	r8, #0x0\n\t"
        "\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
    "L_mlkem_thumb2_basemul_mont_add_loop:\n\t"
#else
    "L_mlkem_thumb2_basemul_mont_add_loop_%=:\n\t"
#endif
        "LDM	%[a]!, {r4, r5}\n\t"
        "LDM	%[b]!, {r6, r7}\n\t"
        "LDR	lr, [r3, r8]\n\t"
        "ADD	r8, r8, #0x2\n\t"
        "PUSH	{r8}\n\t"
        "CMP	r8, #0x80\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "SMULTT	r8, r4, r6\n\t"
        "SMULTT	r10, r5, r7\n\t"
        "SMULTB	r9, r12, r8\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r8, r12, r9, r8\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "RSB	r11, lr, #0x0\n\t"
        "SMULBT	r8, lr, r8\n\t"
        "SMULBT	r10, r11, r10\n\t"
        "SMLABB	r8, r4, r6, r8\n\t"
        "SMLABB	r10, r5, r7, r10\n\t"
        "SMULTB	r9, r12, r8\n\t"
        "SMULTB	r11, r12, r10\n\t"
        "SMLABB	r8, r12, r9, r8\n\t"
        "SMLABB	r10, r12, r11, r10\n\t"
        "SMULBT	r9, r4, r6\n\t"
        "SMULBT	r11, r5, r7\n\t"
        "SMLATB	r9, r4, r6, r9\n\t"
        "SMLATB	r11, r5, r7, r11\n\t"
        "SMULTB	r6, r12, r9\n\t"
        "SMULTB	r7, r12, r11\n\t"
        "SMLABB	r9, r12, r6, r9\n\t"
        "SMLABB	r11, r12, r7, r11\n\t"
        "LDM	%[r], {r4, r5}\n\t"
        "PKHTB	r9, r9, r8, ASR #16\n\t"
        "PKHTB	r11, r11, r10, ASR #16\n\t"
        "SADD16	r4, r4, r9\n\t"
        "SADD16	r5, r5, r11\n\t"
#else
        "ASR	r8, r4, #16\n\t"
        "ASR	r10, r5, #16\n\t"
        "ASR	r9, r6, #16\n\t"
        "ASR	r11, r7, #16\n\t"
        "MUL	r8, r8, r9\n\t"
        "MUL	r10, r10, r11\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r9, r8, #0, #16\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r9, r12, r8\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r9, r9, #0, #16\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r8, r12, r9, r8\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "RSB	r11, lr, #0x0\n\t"
        "SBFX	r9, lr, #0, #16\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "ASR	r8, r8, #16\n\t"
        "ASR	r10, r10, #16\n\t"
        "MUL	r8, r9, r8\n\t"
        "MUL	r10, r11, r10\n\t"
        "SBFX	r9, r4, #0, #16\n\t"
        "SBFX	r11, r5, #0, #16\n\t"
        "SBFX	r12, r6, #0, #16\n\t"
        "MLA	r8, r9, r12, r8\n\t"
        "SBFX	r12, r7, #0, #16\n\t"
        "MLA	r10, r11, r12, r10\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r9, r8, #0, #16\n\t"
        "SBFX	r11, r10, #0, #16\n\t"
        "MUL	r9, r12, r9\n\t"
        "MUL	r11, r12, r11\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r9, r9, #0, #16\n\t"
        "SBFX	r11, r11, #0, #16\n\t"
        "MLA	r8, r12, r9, r8\n\t"
        "MLA	r10, r12, r11, r10\n\t"
        "SBFX	r9, r4, #0, #16\n\t"
        "SBFX	r11, r5, #0, #16\n\t"
        "ASR	r12, r6, #16\n\t"
        "MUL	r9, r9, r12\n\t"
        "ASR	r12, r7, #16\n\t"
        "MUL	r11, r11, r12\n\t"
        "ASR	r4, r4, #16\n\t"
        "ASR	r5, r5, #16\n\t"
        "SBFX	r12, r6, #0, #16\n\t"
        "MLA	r9, r4, r12, r9\n\t"
        "SBFX	r12, r7, #0, #16\n\t"
        "MLA	r11, r5, r12, r11\n\t"
        "MOV	r12, #0xcff\n\t"
        "SBFX	r6, r9, #0, #16\n\t"
        "SBFX	r7, r11, #0, #16\n\t"
        "MUL	r6, r12, r6\n\t"
        "MUL	r7, r12, r7\n\t"
        "MOV	r12, #0xd01\n\t"
        "SBFX	r4, r6, #0, #16\n\t"
        "SBFX	r5, r7, #0, #16\n\t"
        "MLA	r9, r12, r4, r9\n\t"
        "MLA	r11, r12, r5, r11\n\t"
        "LDM	%[r], {r4, r5}\n\t"
        "BFC	r9, #0, #16\n\t"
        "BFC	r11, #0, #16\n\t"
        "ORR	r9, r9, r8, LSR #16\n\t"
        "ORR	r11, r11, r10, LSR #16\n\t"
        "ADD	r8, r4, r9\n\t"
        "ADD	r10, r5, r11\n\t"
        "BFC	r9, #0, #16\n\t"
        "BFC	r11, #0, #16\n\t"
        "ADD	r4, r4, r9\n\t"
        "ADD	r5, r5, r11\n\t"
        "BFI	r4, r8, #0, #16\n\t"
        "BFI	r5, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "STM	%[r]!, {r4, r5}\n\t"
        "POP	{r8}\n\t"
#if defined(__GNUC__)
        "BNE	L_mlkem_thumb2_basemul_mont_add_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BNE.N	L_mlkem_thumb2_basemul_mont_add_loop\n\t"
#else
        "BNE.N	L_mlkem_thumb2_basemul_mont_add_loop_%=\n\t"
#endif
        : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b),
          [L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c)
        :
        : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11",
            "r12", "lr"
    );
}

#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void mlkem_thumb2_csubq(sword16* p_p)
#else
void mlkem_thumb2_csubq(sword16* p)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register sword16* p __asm__ ("r0") = (sword16*)p_p;
    register word16* L_mlkem_basemul_mont_zetas_c __asm__ ("r1") =
        (word16*)&L_mlkem_basemul_mont_zetas;

#else
    register word16* L_mlkem_basemul_mont_zetas_c =
        (word16*)&L_mlkem_basemul_mont_zetas;

#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "MOV	r11, #0xd01\n\t"
        "MOV	r12, #0xd01\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "MOVT	r12, #0xd01\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "MOV	lr, #0x8000\n\t"
        "MOVT	lr, #0x8000\n\t"
        "MOV	r1, #0x100\n\t"
        "\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
    "L_mlkem_thumb2_csubq_loop:\n\t"
#else
    "L_mlkem_thumb2_csubq_loop_%=:\n\t"
#endif
        "LDM	%[p], {r2, r3, r4, r5}\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
        "SSUB16	r2, r2, r12\n\t"
        "SSUB16	r3, r3, r12\n\t"
        "SSUB16	r4, r4, r12\n\t"
        "SSUB16	r5, r5, r12\n\t"
        "AND	r6, r2, lr\n\t"
        "AND	r7, r3, lr\n\t"
        "AND	r8, r4, lr\n\t"
        "AND	r9, r5, lr\n\t"
        "LSR	r6, r6, #15\n\t"
        "LSR	r7, r7, #15\n\t"
        "LSR	r8, r8, #15\n\t"
        "LSR	r9, r9, #15\n\t"
        "MUL	r6, r6, r11\n\t"
        "MUL	r7, r7, r11\n\t"
        "MUL	r8, r8, r11\n\t"
        "MUL	r9, r9, r11\n\t"
        "SADD16	r2, r2, r6\n\t"
        "SADD16	r3, r3, r7\n\t"
        "SADD16	r4, r4, r8\n\t"
        "SADD16	r5, r5, r9\n\t"
#else
        "SUB	r6, r2, r12\n\t"
        "SUB	r2, r2, r12, LSL #16\n\t"
        "BFI	r2, r6, #0, #16\n\t"
        "SUB	r7, r3, r12\n\t"
        "SUB	r3, r3, r12, LSL #16\n\t"
        "BFI	r3, r7, #0, #16\n\t"
        "SUB	r8, r4, r12\n\t"
        "SUB	r4, r4, r12, LSL #16\n\t"
        "BFI	r4, r8, #0, #16\n\t"
        "SUB	r9, r5, r12\n\t"
        "SUB	r5, r5, r12, LSL #16\n\t"
        "BFI	r5, r9, #0, #16\n\t"
        "AND	r6, r2, lr\n\t"
        "AND	r7, r3, lr\n\t"
        "AND	r8, r4, lr\n\t"
        "AND	r9, r5, lr\n\t"
        "LSR	r6, r6, #15\n\t"
        "LSR	r7, r7, #15\n\t"
        "LSR	r8, r8, #15\n\t"
        "LSR	r9, r9, #15\n\t"
        "MUL	r6, r6, r11\n\t"
        "MUL	r7, r7, r11\n\t"
        "MUL	r8, r8, r11\n\t"
        "MUL	r9, r9, r11\n\t"
        "ADD	r10, r2, r6\n\t"
        "BFC	r6, #0, #16\n\t"
        "ADD	r2, r2, r6\n\t"
        "BFI	r2, r10, #0, #16\n\t"
        "ADD	r10, r3, r7\n\t"
        "BFC	r7, #0, #16\n\t"
        "ADD	r3, r3, r7\n\t"
        "BFI	r3, r10, #0, #16\n\t"
        "ADD	r10, r4, r8\n\t"
        "BFC	r8, #0, #16\n\t"
        "ADD	r4, r4, r8\n\t"
        "BFI	r4, r10, #0, #16\n\t"
        "ADD	r10, r5, r9\n\t"
        "BFC	r9, #0, #16\n\t"
        "ADD	r5, r5, r9\n\t"
        "BFI	r5, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
        "STM	%[p]!, {r2, r3, r4, r5}\n\t"
        "SUBS	r1, r1, #0x8\n\t"
#if defined(__GNUC__)
        "BNE	L_mlkem_thumb2_csubq_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BNE.N	L_mlkem_thumb2_csubq_loop\n\t"
#else
        "BNE.N	L_mlkem_thumb2_csubq_loop_%=\n\t"
#endif
        : [p] "+r" (p),
          [L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c)
        :
        : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
            "r11", "r12", "lr"
    );
}

#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
unsigned int mlkem_thumb2_rej_uniform(sword16* p_p, unsigned int len_p,
    const byte* r_p, unsigned int rLen_p)
#else
unsigned int mlkem_thumb2_rej_uniform(sword16* p, unsigned int len,
    const byte* r, unsigned int rLen)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register sword16* p __asm__ ("r0") = (sword16*)p_p;
    register unsigned int len __asm__ ("r1") = (unsigned int)len_p;
    register const byte* r __asm__ ("r2") = (const byte*)r_p;
    register unsigned int rLen __asm__ ("r3") = (unsigned int)rLen_p;
    register word16* L_mlkem_basemul_mont_zetas_c __asm__ ("r4") =
        (word16*)&L_mlkem_basemul_mont_zetas;

#else
    register word16* L_mlkem_basemul_mont_zetas_c =
        (word16*)&L_mlkem_basemul_mont_zetas;

#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "MOV	r8, #0xd01\n\t"
        "MOV	r9, #0x0\n\t"
        "\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
    "L_mlkem_thumb2_rej_uniform_loop_no_fail:\n\t"
#else
    "L_mlkem_thumb2_rej_uniform_loop_no_fail_%=:\n\t"
#endif
        "CMP	%[len], #0x8\n\t"
#if defined(__GNUC__)
        "BLT	L_mlkem_thumb2_rej_uniform_done_no_fail_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BLT.N	L_mlkem_thumb2_rej_uniform_done_no_fail\n\t"
#else
        "BLT.N	L_mlkem_thumb2_rej_uniform_done_no_fail_%=\n\t"
#endif
        "LDM	%[r]!, {r4, r5, r6}\n\t"
        "UBFX	r7, r4, #0, #12\n\t"
        "STRH	r7, [%[p], r9]\n\t"
        "SUB	r10, r7, r8\n\t"
        "LSR	r10, r10, #31\n\t"
        "SUB	%[len], %[len], r10\n\t"
        "ADD	r9, r9, r10, LSL #1\n\t"
        "UBFX	r7, r4, #12, #12\n\t"
        "STRH	r7, [%[p], r9]\n\t"
        "SUB	r10, r7, r8\n\t"
        "LSR	r10, r10, #31\n\t"
        "SUB	%[len], %[len], r10\n\t"
        "ADD	r9, r9, r10, LSL #1\n\t"
        "UBFX	r7, r4, #24, #8\n\t"
        "BFI	r7, r5, #8, #4\n\t"
        "STRH	r7, [%[p], r9]\n\t"
        "SUB	r10, r7, r8\n\t"
        "LSR	r10, r10, #31\n\t"
        "SUB	%[len], %[len], r10\n\t"
        "ADD	r9, r9, r10, LSL #1\n\t"
        "UBFX	r7, r5, #4, #12\n\t"
        "STRH	r7, [%[p], r9]\n\t"
        "SUB	r10, r7, r8\n\t"
        "LSR	r10, r10, #31\n\t"
        "SUB	%[len], %[len], r10\n\t"
        "ADD	r9, r9, r10, LSL #1\n\t"
        "UBFX	r7, r5, #16, #12\n\t"
        "STRH	r7, [%[p], r9]\n\t"
        "SUB	r10, r7, r8\n\t"
        "LSR	r10, r10, #31\n\t"
        "SUB	%[len], %[len], r10\n\t"
        "ADD	r9, r9, r10, LSL #1\n\t"
        "UBFX	r7, r5, #28, #4\n\t"
        "BFI	r7, r6, #4, #8\n\t"
        "STRH	r7, [%[p], r9]\n\t"
        "SUB	r10, r7, r8\n\t"
        "LSR	r10, r10, #31\n\t"
        "SUB	%[len], %[len], r10\n\t"
        "ADD	r9, r9, r10, LSL #1\n\t"
        "UBFX	r7, r6, #8, #12\n\t"
        "STRH	r7, [%[p], r9]\n\t"
        "SUB	r10, r7, r8\n\t"
        "LSR	r10, r10, #31\n\t"
        "SUB	%[len], %[len], r10\n\t"
        "ADD	r9, r9, r10, LSL #1\n\t"
        "UBFX	r7, r6, #20, #12\n\t"
        "STRH	r7, [%[p], r9]\n\t"
        "SUB	r10, r7, r8\n\t"
        "LSR	r10, r10, #31\n\t"
        "SUB	%[len], %[len], r10\n\t"
        "ADD	r9, r9, r10, LSL #1\n\t"
        "SUBS	%[rLen], %[rLen], #0xc\n\t"
#if defined(__GNUC__)
        "BNE	L_mlkem_thumb2_rej_uniform_loop_no_fail_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BNE.N	L_mlkem_thumb2_rej_uniform_loop_no_fail\n\t"
#else
        "BNE.N	L_mlkem_thumb2_rej_uniform_loop_no_fail_%=\n\t"
#endif
#if defined(__GNUC__)
        "B	L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "B.N	L_mlkem_thumb2_rej_uniform_done\n\t"
#else
        "B.N	L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#endif
        "\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
    "L_mlkem_thumb2_rej_uniform_done_no_fail:\n\t"
#else
    "L_mlkem_thumb2_rej_uniform_done_no_fail_%=:\n\t"
#endif
        "CMP	%[len], #0x0\n\t"
#if defined(__GNUC__)
        "BEQ	L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BEQ.N	L_mlkem_thumb2_rej_uniform_done\n\t"
#else
        "BEQ.N	L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#endif
        "\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
    "L_mlkem_thumb2_rej_uniform_loop:\n\t"
#else
    "L_mlkem_thumb2_rej_uniform_loop_%=:\n\t"
#endif
        "LDM	%[r]!, {r4, r5, r6}\n\t"
        "UBFX	r7, r4, #0, #12\n\t"
        "CMP	r7, r8\n\t"
#if defined(__GNUC__)
        "BGE	L_mlkem_thumb2_rej_uniform_fail_0_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BGE.N	L_mlkem_thumb2_rej_uniform_fail_0\n\t"
#else
        "BGE.N	L_mlkem_thumb2_rej_uniform_fail_0_%=\n\t"
#endif
        "STRH	r7, [%[p], r9]\n\t"
        "SUBS	%[len], %[len], #0x1\n\t"
        "ADD	r9, r9, #0x2\n\t"
#if defined(__GNUC__)
        "BEQ	L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BEQ.N	L_mlkem_thumb2_rej_uniform_done\n\t"
#else
        "BEQ.N	L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#endif
        "\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
    "L_mlkem_thumb2_rej_uniform_fail_0:\n\t"
#else
    "L_mlkem_thumb2_rej_uniform_fail_0_%=:\n\t"
#endif
        "UBFX	r7, r4, #12, #12\n\t"
        "CMP	r7, r8\n\t"
#if defined(__GNUC__)
        "BGE	L_mlkem_thumb2_rej_uniform_fail_1_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BGE.N	L_mlkem_thumb2_rej_uniform_fail_1\n\t"
#else
        "BGE.N	L_mlkem_thumb2_rej_uniform_fail_1_%=\n\t"
#endif
        "STRH	r7, [%[p], r9]\n\t"
        "SUBS	%[len], %[len], #0x1\n\t"
        "ADD	r9, r9, #0x2\n\t"
#if defined(__GNUC__)
        "BEQ	L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BEQ.N	L_mlkem_thumb2_rej_uniform_done\n\t"
#else
        "BEQ.N	L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#endif
        "\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
    "L_mlkem_thumb2_rej_uniform_fail_1:\n\t"
#else
    "L_mlkem_thumb2_rej_uniform_fail_1_%=:\n\t"
#endif
        "UBFX	r7, r4, #24, #8\n\t"
        "BFI	r7, r5, #8, #4\n\t"
        "CMP	r7, r8\n\t"
#if defined(__GNUC__)
        "BGE	L_mlkem_thumb2_rej_uniform_fail_2_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BGE.N	L_mlkem_thumb2_rej_uniform_fail_2\n\t"
#else
        "BGE.N	L_mlkem_thumb2_rej_uniform_fail_2_%=\n\t"
#endif
        "STRH	r7, [%[p], r9]\n\t"
        "SUBS	%[len], %[len], #0x1\n\t"
        "ADD	r9, r9, #0x2\n\t"
#if defined(__GNUC__)
        "BEQ	L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BEQ.N	L_mlkem_thumb2_rej_uniform_done\n\t"
#else
        "BEQ.N	L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#endif
        "\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
    "L_mlkem_thumb2_rej_uniform_fail_2:\n\t"
#else
    "L_mlkem_thumb2_rej_uniform_fail_2_%=:\n\t"
#endif
        "UBFX	r7, r5, #4, #12\n\t"
        "CMP	r7, r8\n\t"
#if defined(__GNUC__)
        "BGE	L_mlkem_thumb2_rej_uniform_fail_3_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BGE.N	L_mlkem_thumb2_rej_uniform_fail_3\n\t"
#else
        "BGE.N	L_mlkem_thumb2_rej_uniform_fail_3_%=\n\t"
#endif
        "STRH	r7, [%[p], r9]\n\t"
        "SUBS	%[len], %[len], #0x1\n\t"
        "ADD	r9, r9, #0x2\n\t"
#if defined(__GNUC__)
        "BEQ	L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BEQ.N	L_mlkem_thumb2_rej_uniform_done\n\t"
#else
        "BEQ.N	L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#endif
        "\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
    "L_mlkem_thumb2_rej_uniform_fail_3:\n\t"
#else
    "L_mlkem_thumb2_rej_uniform_fail_3_%=:\n\t"
#endif
        "UBFX	r7, r5, #16, #12\n\t"
        "CMP	r7, r8\n\t"
#if defined(__GNUC__)
        "BGE	L_mlkem_thumb2_rej_uniform_fail_4_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BGE.N	L_mlkem_thumb2_rej_uniform_fail_4\n\t"
#else
        "BGE.N	L_mlkem_thumb2_rej_uniform_fail_4_%=\n\t"
#endif
        "STRH	r7, [%[p], r9]\n\t"
        "SUBS	%[len], %[len], #0x1\n\t"
        "ADD	r9, r9, #0x2\n\t"
#if defined(__GNUC__)
        "BEQ	L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BEQ.N	L_mlkem_thumb2_rej_uniform_done\n\t"
#else
        "BEQ.N	L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#endif
        "\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
    "L_mlkem_thumb2_rej_uniform_fail_4:\n\t"
#else
    "L_mlkem_thumb2_rej_uniform_fail_4_%=:\n\t"
#endif
        "UBFX	r7, r5, #28, #4\n\t"
        "BFI	r7, r6, #4, #8\n\t"
        "CMP	r7, r8\n\t"
#if defined(__GNUC__)
        "BGE	L_mlkem_thumb2_rej_uniform_fail_5_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BGE.N	L_mlkem_thumb2_rej_uniform_fail_5\n\t"
#else
        "BGE.N	L_mlkem_thumb2_rej_uniform_fail_5_%=\n\t"
#endif
        "STRH	r7, [%[p], r9]\n\t"
        "SUBS	%[len], %[len], #0x1\n\t"
        "ADD	r9, r9, #0x2\n\t"
#if defined(__GNUC__)
        "BEQ	L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BEQ.N	L_mlkem_thumb2_rej_uniform_done\n\t"
#else
        "BEQ.N	L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#endif
        "\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
    "L_mlkem_thumb2_rej_uniform_fail_5:\n\t"
#else
    "L_mlkem_thumb2_rej_uniform_fail_5_%=:\n\t"
#endif
        "UBFX	r7, r6, #8, #12\n\t"
        "CMP	r7, r8\n\t"
#if defined(__GNUC__)
        "BGE	L_mlkem_thumb2_rej_uniform_fail_6_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BGE.N	L_mlkem_thumb2_rej_uniform_fail_6\n\t"
#else
        "BGE.N	L_mlkem_thumb2_rej_uniform_fail_6_%=\n\t"
#endif
        "STRH	r7, [%[p], r9]\n\t"
        "SUBS	%[len], %[len], #0x1\n\t"
        "ADD	r9, r9, #0x2\n\t"
#if defined(__GNUC__)
        "BEQ	L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BEQ.N	L_mlkem_thumb2_rej_uniform_done\n\t"
#else
        "BEQ.N	L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#endif
        "\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
    "L_mlkem_thumb2_rej_uniform_fail_6:\n\t"
#else
    "L_mlkem_thumb2_rej_uniform_fail_6_%=:\n\t"
#endif
        "UBFX	r7, r6, #20, #12\n\t"
        "CMP	r7, r8\n\t"
#if defined(__GNUC__)
        "BGE	L_mlkem_thumb2_rej_uniform_fail_7_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BGE.N	L_mlkem_thumb2_rej_uniform_fail_7\n\t"
#else
        "BGE.N	L_mlkem_thumb2_rej_uniform_fail_7_%=\n\t"
#endif
        "STRH	r7, [%[p], r9]\n\t"
        "SUBS	%[len], %[len], #0x1\n\t"
        "ADD	r9, r9, #0x2\n\t"
#if defined(__GNUC__)
        "BEQ	L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BEQ.N	L_mlkem_thumb2_rej_uniform_done\n\t"
#else
        "BEQ.N	L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#endif
        "\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
    "L_mlkem_thumb2_rej_uniform_fail_7:\n\t"
#else
    "L_mlkem_thumb2_rej_uniform_fail_7_%=:\n\t"
#endif
        "SUBS	%[rLen], %[rLen], #0xc\n\t"
#if defined(__GNUC__)
        "BGT	L_mlkem_thumb2_rej_uniform_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
        "BGT.N	L_mlkem_thumb2_rej_uniform_loop\n\t"
#else
        "BGT.N	L_mlkem_thumb2_rej_uniform_loop_%=\n\t"
#endif
        "\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
    "L_mlkem_thumb2_rej_uniform_done:\n\t"
#else
    "L_mlkem_thumb2_rej_uniform_done_%=:\n\t"
#endif
        "LSR	r0, r9, #1\n\t"
        : [p] "+r" (p), [len] "+r" (len), [r] "+r" (r), [rLen] "+r" (rLen),
          [L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c)
        :
        : "memory", "cc", "r5", "r6", "r7", "r8", "r9", "r10"
    );
    return (word32)(size_t)p;
}

#endif /* WOLFSSL_WC_MLKEM */
#endif /* WOLFSSL_ARMASM_THUMB2 */
#endif /* WOLFSSL_ARMASM */
#endif /* WOLFSSL_ARMASM_INLINE */
