diff options
Diffstat (limited to 'client/wolfssl/wolfcrypt/src/port/arm/armv8-32-curve25519.c')
| -rw-r--r-- | client/wolfssl/wolfcrypt/src/port/arm/armv8-32-curve25519.c | 5581 |
1 files changed, 0 insertions, 5581 deletions
diff --git a/client/wolfssl/wolfcrypt/src/port/arm/armv8-32-curve25519.c b/client/wolfssl/wolfcrypt/src/port/arm/armv8-32-curve25519.c deleted file mode 100644 index f7ef379..0000000 --- a/client/wolfssl/wolfcrypt/src/port/arm/armv8-32-curve25519.c +++ /dev/null @@ -1,5581 +0,0 @@ -/* armv8-32-curve25519 - * - * Copyright (C) 2006-2020 wolfSSL Inc. - * - * This file is part of wolfSSL. - * - * wolfSSL is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * wolfSSL is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA - */ - -/* Generated using (from wolfssl): - * cd ../scripts - * ruby ./x25519/x25519.rb arm32 ../wolfssl/wolfcrypt/src/port/arm/armv8-32-curve25519.c - */ - -#ifndef __aarch64__ - -#include <stdint.h> -#ifdef HAVE_CONFIG_H - #include <config.h> -#endif - -#include <wolfssl/wolfcrypt/settings.h> - -#ifdef WOLFSSL_ARMASM -#include <wolfssl/wolfcrypt/fe_operations.h> -#include <stdint.h> - -void fe_init() -{ - __asm__ __volatile__ ( - "\n\t" - : - : - : "memory" - ); -} - -void fe_frombytes(fe out, const unsigned char* in) -{ - __asm__ __volatile__ ( - "ldrd r2, r3, [%[in]]\n\t" - "ldrd r12, lr, [%[in], #8]\n\t" - "ldrd r4, r5, [%[in], #16]\n\t" - "ldrd r6, r7, [%[in], #24]\n\t" - "and r7, r7, #0x7fffffff\n\t" - "strd r2, r3, [%[out]]\n\t" - "strd r12, lr, [%[out], #8]\n\t" - "strd r4, r5, [%[out], #16]\n\t" - "strd r6, r7, [%[out], #24]\n\t" - : [out] "+r" (out), [in] "+r" (in) - : - : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7" - ); -} - -void fe_tobytes(unsigned char* out, const fe n) -{ - __asm__ __volatile__ ( - "ldrd r2, r3, [%[in]]\n\t" - "ldrd r12, lr, [%[in], #8]\n\t" - "ldrd r4, r5, [%[in], #16]\n\t" - "ldrd r6, r7, [%[in], #24]\n\t" - "adds r8, r2, #19\n\t" - "adcs r8, r3, #0\n\t" - "adcs r8, r12, #0\n\t" - "adcs r8, lr, #0\n\t" - "adcs r8, r4, #0\n\t" - "adcs r8, r5, #0\n\t" - "adcs r8, r6, #0\n\t" - "adc r8, r7, #0\n\t" - "asr r8, r8, #31\n\t" - "and r8, r8, #19\n\t" - "adds r2, r2, r8\n\t" - "adcs r3, r3, #0\n\t" - "adcs r12, r12, #0\n\t" - "adcs lr, lr, #0\n\t" - "adcs r4, r4, #0\n\t" - "adcs r5, r5, #0\n\t" - "adcs r6, r6, #0\n\t" - "adc r7, r7, #0\n\t" - "and r7, r7, #0x7fffffff\n\t" - "strd r2, r3, [%[out]]\n\t" - "strd r12, lr, [%[out], #8]\n\t" - "strd r4, r5, [%[out], #16]\n\t" - "strd r6, r7, [%[out], #24]\n\t" - : [out] "+r" (out), [n] "+r" (n) - : - : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" - ); -} - -void fe_1(fe n) -{ - __asm__ __volatile__ ( - /* Set one */ - "mov r2, #1\n\t" - "mov r1, #0\n\t" - "strd r2, r1, [%[n]]\n\t" - "strd r1, r1, [%[n], #8]\n\t" - "strd r1, r1, [%[n], #16]\n\t" - "strd r1, r1, [%[n], #24]\n\t" - : [n] "+r" (n) - : - : "memory", "r1", "r2" - ); -} - -void fe_0(fe n) -{ - __asm__ __volatile__ ( - /* Set zero */ - "mov r1, #0\n\t" - "strd r1, r1, [%[n]]\n\t" - "strd r1, r1, [%[n], #8]\n\t" - "strd r1, r1, [%[n], #16]\n\t" - "strd r1, r1, [%[n], #24]\n\t" - : [n] "+r" (n) - : - : "memory", "r1" - ); -} - -void fe_copy(fe r, const fe a) -{ - __asm__ __volatile__ ( - /* Copy */ - "ldrd r2, r3, [%[a]]\n\t" - "ldrd r12, lr, [%[a], #8]\n\t" - "strd r2, r3, [%[r]]\n\t" - "strd r12, lr, [%[r], #8]\n\t" - "ldrd r2, r3, [%[a], #16]\n\t" - "ldrd r12, lr, [%[a], #24]\n\t" - "strd r2, r3, [%[r], #16]\n\t" - "strd r12, lr, [%[r], #24]\n\t" - : [r] "+r" (r), [a] "+r" (a) - : - : "memory", "r2", "r3", "r12", "lr" - ); -} - -void fe_sub(fe r, const fe a, const fe b) -{ - __asm__ __volatile__ ( - /* Sub */ - "ldrd r12, lr, [%[a]]\n\t" - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b]]\n\t" - "ldrd r8, r9, [%[b], #8]\n\t" - "subs r6, r12, r6\n\t" - "sbcs r7, lr, r7\n\t" - "sbcs r8, r4, r8\n\t" - "sbcs r9, r5, r9\n\t" - "strd r6, r7, [%[r]]\n\t" - "strd r8, r9, [%[r], #8]\n\t" - "ldrd r12, lr, [%[a], #16]\n\t" - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" - "ldrd r8, r9, [%[b], #24]\n\t" - "sbcs r6, r12, r6\n\t" - "sbcs r7, lr, r7\n\t" - "sbcs r8, r4, r8\n\t" - "sbc r9, r5, r9\n\t" - "mov r10, #-19\n\t" - "asr r3, r9, #31\n\t" - /* Mask the modulus */ - "and r10, r3, r10\n\t" - "and r11, r3, #0x7fffffff\n\t" - /* Add modulus (if underflow) */ - "ldrd r12, lr, [%[r]]\n\t" - "ldrd r4, r5, [%[r], #8]\n\t" - "adds r12, r12, r10\n\t" - "adcs lr, lr, r3\n\t" - "adcs r4, r4, r3\n\t" - "adcs r5, r5, r3\n\t" - "adcs r6, r6, r3\n\t" - "adcs r7, r7, r3\n\t" - "adcs r8, r8, r3\n\t" - "adc r9, r9, r11\n\t" - "strd r12, lr, [%[r]]\n\t" - "strd r4, r5, [%[r], #8]\n\t" - "strd r6, r7, [%[r], #16]\n\t" - "strd r8, r9, [%[r], #24]\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" - ); -} - -void fe_add(fe r, const fe a, const fe b) -{ - __asm__ __volatile__ ( - /* Add */ - "ldrd r12, lr, [%[a]]\n\t" - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b]]\n\t" - "ldrd r8, r9, [%[b], #8]\n\t" - "adds r6, r12, r6\n\t" - "adcs r7, lr, r7\n\t" - "adcs r8, r4, r8\n\t" - "adcs r9, r5, r9\n\t" - "strd r6, r7, [%[r]]\n\t" - "strd r8, r9, [%[r], #8]\n\t" - "ldrd r12, lr, [%[a], #16]\n\t" - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" - "ldrd r8, r9, [%[b], #24]\n\t" - "adcs r6, r12, r6\n\t" - "adcs r7, lr, r7\n\t" - "adcs r8, r4, r8\n\t" - "adc r9, r5, r9\n\t" - "mov r10, #-19\n\t" - "asr r3, r9, #31\n\t" - /* Mask the modulus */ - "and r10, r3, r10\n\t" - "and r11, r3, #0x7fffffff\n\t" - /* Sub modulus (if overflow) */ - "ldrd r12, lr, [%[r]]\n\t" - "ldrd r4, r5, [%[r], #8]\n\t" - "subs r12, r12, r10\n\t" - "sbcs lr, lr, r3\n\t" - "sbcs r4, r4, r3\n\t" - "sbcs r5, r5, r3\n\t" - "sbcs r6, r6, r3\n\t" - "sbcs r7, r7, r3\n\t" - "sbcs r8, r8, r3\n\t" - "sbc r9, r9, r11\n\t" - "strd r12, lr, [%[r]]\n\t" - "strd r4, r5, [%[r], #8]\n\t" - "strd r6, r7, [%[r], #16]\n\t" - "strd r8, r9, [%[r], #24]\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" - ); -} - -void fe_neg(fe r, const fe a) -{ - __asm__ __volatile__ ( - "mov r5, #-1\n\t" - "mov r4, #-19\n\t" - "ldrd r2, r3, [%[a]]\n\t" - "ldrd r12, lr, [%[a], #8]\n\t" - "subs r2, r4, r2\n\t" - "sbcs r3, r5, r3\n\t" - "sbcs r12, r5, r12\n\t" - "sbcs lr, r5, lr\n\t" - "strd r2, r3, [%[r]]\n\t" - "strd r12, lr, [%[r], #8]\n\t" - "mov r4, #0x7fffffff\n\t" - "ldrd r2, r3, [%[a], #16]\n\t" - "ldrd r12, lr, [%[a], #24]\n\t" - "sbcs r2, r5, r2\n\t" - "sbcs r3, r5, r3\n\t" - "sbcs r12, r5, r12\n\t" - "sbc lr, r4, lr\n\t" - "strd r2, r3, [%[r], #16]\n\t" - "strd r12, lr, [%[r], #24]\n\t" - : [r] "+r" (r), [a] "+r" (a) - : - : "memory", "r2", "r3", "r12", "lr", "r4", "r5" - ); -} - -int fe_isnonzero(const fe a) -{ - __asm__ __volatile__ ( - "ldrd r2, r3, [%[a]]\n\t" - "ldrd r12, lr, [%[a], #8]\n\t" - "ldrd r4, r5, [%[a], #16]\n\t" - "ldrd r6, r7, [%[a], #24]\n\t" - "adds r1, r2, #19\n\t" - "adcs r1, r3, #0\n\t" - "adcs r1, r12, #0\n\t" - "adcs r1, lr, #0\n\t" - "adcs r1, r4, #0\n\t" - "adcs r1, r5, #0\n\t" - "adcs r1, r6, #0\n\t" - "adc r1, r7, #0\n\t" - "asr r1, r1, #31\n\t" - "and r1, r1, #19\n\t" - "adds r2, r2, r1\n\t" - "adcs r3, r3, #0\n\t" - "adcs r12, r12, #0\n\t" - "adcs lr, lr, #0\n\t" - "adcs r4, r4, #0\n\t" - "adcs r5, r5, #0\n\t" - "adcs r6, r6, #0\n\t" - "adc r7, r7, #0\n\t" - "and r7, r7, #0x7fffffff\n\t" - "orr r2, r2, r3\n\t" - "orr r12, r12, lr\n\t" - "orr r4, r4, r5\n\t" - "orr r6, r6, r7\n\t" - "orr r12, r12, r4\n\t" - "orr r2, r2, r6\n\t" - "orr %[a], r2, r12\n\t" - : [a] "+r" (a) - : - : "memory", "r1", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" - ); - return (uint32_t)(size_t)a; -} - -int fe_isnegative(const fe a) -{ - __asm__ __volatile__ ( - "ldrd r2, r3, [%[a]]\n\t" - "ldrd r12, lr, [%[a], #8]\n\t" - "adds r1, r2, #19\n\t" - "adcs r1, r3, #0\n\t" - "adcs r1, r12, #0\n\t" - "adcs r1, lr, #0\n\t" - "ldrd r2, r3, [%[a], #16]\n\t" - "ldrd r12, lr, [%[a], #24]\n\t" - "adcs r1, r2, #0\n\t" - "adcs r1, r3, #0\n\t" - "adcs r1, r12, #0\n\t" - "ldr r2, [%[a]]\n\t" - "adc r1, lr, #0\n\t" - "and %[a], r2, #1\n\t" - "lsr r1, r1, #31\n\t" - "eor %[a], %[a], r1\n\t" - : [a] "+r" (a) - : - : "memory", "r1", "r2", "r3", "r12", "lr" - ); - return (uint32_t)(size_t)a; -} - -void fe_cmov_table(fe* r, fe* base, signed char b) -{ - __asm__ __volatile__ ( - "sxtb %[b], %[b]\n\t" - "sbfx r7, %[b], #7, #1\n\t" - "eor r10, %[b], r7\n\t" - "sub r10, r10, r7\n\t" - "mov r3, #1\n\t" - "mov r12, #0\n\t" - "mov lr, #1\n\t" - "mov r4, #0\n\t" - "mov r5, #0\n\t" - "mov r6, #0\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #31\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base]]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #32]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #64]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #30\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base]]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #32]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #64]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #29\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base]]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #32]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #64]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #28\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base]]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #32]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #64]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #27\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base]]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #32]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #64]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #26\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base]]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #32]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #64]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #25\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base]]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #32]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #64]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #24\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base]]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #32]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #64]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "sub %[base], %[base], #0x2a0\n\t" - "mov r8, #-19\n\t" - "mov r9, #-1\n\t" - "subs r8, r8, r5\n\t" - "sbcs r9, r9, r6\n\t" - "sbc r11, r11, r11\n\t" - "asr r10, %[b], #31\n\t" - "eor r7, r3, lr\n\t" - "and r7, r7, r10\n\t" - "eor r3, r3, r7\n\t" - "eor lr, lr, r7\n\t" - "eor r7, r12, r4\n\t" - "and r7, r7, r10\n\t" - "eor r12, r12, r7\n\t" - "eor r4, r4, r7\n\t" - "eor r8, r8, r5\n\t" - "and r8, r8, r10\n\t" - "eor r5, r5, r8\n\t" - "eor r9, r9, r6\n\t" - "and r9, r9, r10\n\t" - "eor r6, r6, r9\n\t" - "strd r3, r12, [%[r]]\n\t" - "strd lr, r4, [%[r], #32]\n\t" - "strd r5, r6, [%[r], #64]\n\t" - "sbfx r7, %[b], #7, #1\n\t" - "eor r10, %[b], r7\n\t" - "sub r10, r10, r7\n\t" - "mov r3, #0\n\t" - "mov r12, #0\n\t" - "mov lr, #0\n\t" - "mov r4, #0\n\t" - "mov r5, #0\n\t" - "mov r6, #0\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #31\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base], #8]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #40]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #72]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #30\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base], #8]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #40]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #72]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #29\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base], #8]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #40]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #72]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #28\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base], #8]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #40]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #72]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #27\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base], #8]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #40]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #72]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #26\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base], #8]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #40]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #72]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #25\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base], #8]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #40]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #72]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #24\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base], #8]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #40]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #72]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "sub %[base], %[base], #0x2a0\n\t" - "mov r8, #-1\n\t" - "mov r9, #-1\n\t" - "rsbs r11, r11, #0\n\t" - "sbcs r8, r8, r5\n\t" - "sbcs r9, r9, r6\n\t" - "sbc r11, r11, r11\n\t" - "asr r10, %[b], #31\n\t" - "eor r7, r3, lr\n\t" - "and r7, r7, r10\n\t" - "eor r3, r3, r7\n\t" - "eor lr, lr, r7\n\t" - "eor r7, r12, r4\n\t" - "and r7, r7, r10\n\t" - "eor r12, r12, r7\n\t" - "eor r4, r4, r7\n\t" - "eor r8, r8, r5\n\t" - "and r8, r8, r10\n\t" - "eor r5, r5, r8\n\t" - "eor r9, r9, r6\n\t" - "and r9, r9, r10\n\t" - "eor r6, r6, r9\n\t" - "strd r3, r12, [%[r], #8]\n\t" - "strd lr, r4, [%[r], #40]\n\t" - "strd r5, r6, [%[r], #72]\n\t" - "sbfx r7, %[b], #7, #1\n\t" - "eor r10, %[b], r7\n\t" - "sub r10, r10, r7\n\t" - "mov r3, #0\n\t" - "mov r12, #0\n\t" - "mov lr, #0\n\t" - "mov r4, #0\n\t" - "mov r5, #0\n\t" - "mov r6, #0\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #31\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base], #16]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #48]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #80]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #30\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base], #16]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #48]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #80]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #29\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base], #16]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #48]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #80]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #28\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base], #16]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #48]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #80]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #27\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base], #16]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #48]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #80]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #26\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base], #16]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #48]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #80]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #25\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base], #16]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #48]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #80]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #24\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base], #16]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #48]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #80]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "sub %[base], %[base], #0x2a0\n\t" - "mov r8, #-1\n\t" - "mov r9, #-1\n\t" - "rsbs r11, r11, #0\n\t" - "sbcs r8, r8, r5\n\t" - "sbcs r9, r9, r6\n\t" - "sbc r11, r11, r11\n\t" - "asr r10, %[b], #31\n\t" - "eor r7, r3, lr\n\t" - "and r7, r7, r10\n\t" - "eor r3, r3, r7\n\t" - "eor lr, lr, r7\n\t" - "eor r7, r12, r4\n\t" - "and r7, r7, r10\n\t" - "eor r12, r12, r7\n\t" - "eor r4, r4, r7\n\t" - "eor r8, r8, r5\n\t" - "and r8, r8, r10\n\t" - "eor r5, r5, r8\n\t" - "eor r9, r9, r6\n\t" - "and r9, r9, r10\n\t" - "eor r6, r6, r9\n\t" - "strd r3, r12, [%[r], #16]\n\t" - "strd lr, r4, [%[r], #48]\n\t" - "strd r5, r6, [%[r], #80]\n\t" - "sbfx r7, %[b], #7, #1\n\t" - "eor r10, %[b], r7\n\t" - "sub r10, r10, r7\n\t" - "mov r3, #0\n\t" - "mov r12, #0\n\t" - "mov lr, #0\n\t" - "mov r4, #0\n\t" - "mov r5, #0\n\t" - "mov r6, #0\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #31\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base], #24]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #56]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #88]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #30\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base], #24]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #56]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #88]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #29\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base], #24]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #56]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #88]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #28\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base], #24]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #56]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #88]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #27\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base], #24]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #56]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #88]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #26\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base], #24]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #56]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #88]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #25\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base], #24]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #56]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #88]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "add %[base], %[base], #0x60\n\t" - "mov r7, #0x80000000\n\t" - "ror r7, r7, #24\n\t" - "ror r7, r7, r10\n\t" - "asr r7, r7, #31\n\t" - "ldrd r8, r9, [%[base], #24]\n\t" - "eor r8, r8, r3\n\t" - "eor r9, r9, r12\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r3, r3, r8\n\t" - "eor r12, r12, r9\n\t" - "ldrd r8, r9, [%[base], #56]\n\t" - "eor r8, r8, lr\n\t" - "eor r9, r9, r4\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor lr, lr, r8\n\t" - "eor r4, r4, r9\n\t" - "ldrd r8, r9, [%[base], #88]\n\t" - "eor r8, r8, r5\n\t" - "eor r9, r9, r6\n\t" - "and r8, r8, r7\n\t" - "and r9, r9, r7\n\t" - "eor r5, r5, r8\n\t" - "eor r6, r6, r9\n\t" - "sub %[base], %[base], #0x2a0\n\t" - "mov r8, #-1\n\t" - "mov r9, #0x7fffffff\n\t" - "rsbs r11, r11, #0\n\t" - "sbcs r8, r8, r5\n\t" - "sbc r9, r9, r6\n\t" - "asr r10, %[b], #31\n\t" - "eor r7, r3, lr\n\t" - "and r7, r7, r10\n\t" - "eor r3, r3, r7\n\t" - "eor lr, lr, r7\n\t" - "eor r7, r12, r4\n\t" - "and r7, r7, r10\n\t" - "eor r12, r12, r7\n\t" - "eor r4, r4, r7\n\t" - "eor r8, r8, r5\n\t" - "and r8, r8, r10\n\t" - "eor r5, r5, r8\n\t" - "eor r9, r9, r6\n\t" - "and r9, r9, r10\n\t" - "eor r6, r6, r9\n\t" - "strd r3, r12, [%[r], #24]\n\t" - "strd lr, r4, [%[r], #56]\n\t" - "strd r5, r6, [%[r], #88]\n\t" - : [r] "+r" (r), [base] "+r" (base), [b] "+r" (b) - : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" - ); -} - -void fe_mul(fe r, const fe a, const fe b) -{ - __asm__ __volatile__ ( - "sub sp, sp, #0x40\n\t" - /* Multiply */ - "ldr r7, [%[a]]\n\t" - "ldr r8, [%[a], #4]\n\t" - "ldr r9, [%[b]]\n\t" - "ldr lr, [%[b], #4]\n\t" - /* A[0] * B[0] = 0 */ - "umull r4, r5, r7, r9\n\t" - "str r4, [sp]\n\t" - /* A[0] * B[1] = 1 */ - "umull r3, r6, r7, lr\n\t" - "adds r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - /* A[1] * B[0] = 1 */ - "umull r3, r12, r8, r9\n\t" - "adds r5, r5, r3\n\t" - "mov r4, #0\n\t" - "adcs r6, r6, r12\n\t" - "adc r4, r4, #0\n\t" - "str r5, [sp, #4]\n\t" - /* A[2] * B[0] = 2 */ - "ldr r10, [%[a], #8]\n\t" - "umull r3, r12, r10, r9\n\t" - "adds r6, r6, r3\n\t" - "adc r4, r4, r12\n\t" - /* A[1] * B[1] = 2 */ - "umull r3, r12, r8, lr\n\t" - "adds r6, r6, r3\n\t" - "mov r5, #0\n\t" - "adcs r4, r4, r12\n\t" - "adc r5, r5, #0\n\t" - /* A[0] * B[2] = 2 */ - "ldr r11, [%[b], #8]\n\t" - "umull r3, r12, r7, r11\n\t" - "adds r6, r6, r3\n\t" - "adcs r4, r4, r12\n\t" - "adc r5, r5, #0\n\t" - "str r6, [sp, #8]\n\t" - /* A[0] * B[3] = 3 */ - "ldr r11, [%[b], #12]\n\t" - "umull r3, r12, r7, r11\n\t" - "adds r4, r4, r3\n\t" - "mov r6, #0\n\t" - "adcs r5, r5, r12\n\t" - "adc r6, r6, #0\n\t" - /* A[1] * B[2] = 3 */ - "ldr r11, [%[b], #8]\n\t" - "umull r3, r12, r8, r11\n\t" - "adds r4, r4, r3\n\t" - "adcs r5, r5, r12\n\t" - "adc r6, r6, #0\n\t" - /* A[2] * B[1] = 3 */ - "umull r3, r12, r10, lr\n\t" - "adds r4, r4, r3\n\t" - "adcs r5, r5, r12\n\t" - "adc r6, r6, #0\n\t" - /* A[3] * B[0] = 3 */ - "ldr r10, [%[a], #12]\n\t" - "umull r3, r12, r10, r9\n\t" - "adds r4, r4, r3\n\t" - "adcs r5, r5, r12\n\t" - "adc r6, r6, #0\n\t" - "str r4, [sp, #12]\n\t" - /* A[4] * B[0] = 4 */ - "ldr r10, [%[a], #16]\n\t" - "umull r3, r12, r10, r9\n\t" - "adds r5, r5, r3\n\t" - "mov r4, #0\n\t" - "adcs r6, r6, r12\n\t" - "adc r4, r4, #0\n\t" - /* A[3] * B[1] = 4 */ - "ldr r10, [%[a], #12]\n\t" - "umull r3, r12, r10, lr\n\t" - "adds r5, r5, r3\n\t" - "adcs r6, r6, r12\n\t" - "adc r4, r4, #0\n\t" - /* A[2] * B[2] = 4 */ - "ldr r10, [%[a], #8]\n\t" - "umull r3, r12, r10, r11\n\t" - "adds r5, r5, r3\n\t" - "adcs r6, r6, r12\n\t" - "adc r4, r4, #0\n\t" - /* A[1] * B[3] = 4 */ - "ldr r11, [%[b], #12]\n\t" - "umull r3, r12, r8, r11\n\t" - "adds r5, r5, r3\n\t" - "adcs r6, r6, r12\n\t" - "adc r4, r4, #0\n\t" - /* A[0] * B[4] = 4 */ - "ldr r11, [%[b], #16]\n\t" - "umull r3, r12, r7, r11\n\t" - "adds r5, r5, r3\n\t" - "adcs r6, r6, r12\n\t" - "adc r4, r4, #0\n\t" - "str r5, [sp, #16]\n\t" - /* A[0] * B[5] = 5 */ - "ldr r11, [%[b], #20]\n\t" - "umull r3, r12, r7, r11\n\t" - "adds r6, r6, r3\n\t" - "mov r5, #0\n\t" - "adcs r4, r4, r12\n\t" - "adc r5, r5, #0\n\t" - /* A[1] * B[4] = 5 */ - "ldr r11, [%[b], #16]\n\t" - "umull r3, r12, r8, r11\n\t" - "adds r6, r6, r3\n\t" - "adcs r4, r4, r12\n\t" - "adc r5, r5, #0\n\t" - /* A[2] * B[3] = 5 */ - "ldr r11, [%[b], #12]\n\t" - "umull r3, r12, r10, r11\n\t" - "adds r6, r6, r3\n\t" - "adcs r4, r4, r12\n\t" - "adc r5, r5, #0\n\t" - /* A[3] * B[2] = 5 */ - "ldr r10, [%[a], #12]\n\t" - "ldr r11, [%[b], #8]\n\t" - "umull r3, r12, r10, r11\n\t" - "adds r6, r6, r3\n\t" - "adcs r4, r4, r12\n\t" - "adc r5, r5, #0\n\t" - /* A[4] * B[1] = 5 */ - "ldr r10, [%[a], #16]\n\t" - "umull r3, r12, r10, lr\n\t" - "adds r6, r6, r3\n\t" - "adcs r4, r4, r12\n\t" - "adc r5, r5, #0\n\t" - /* A[5] * B[0] = 5 */ - "ldr r10, [%[a], #20]\n\t" - "umull r3, r12, r10, r9\n\t" - "adds r6, r6, r3\n\t" - "adcs r4, r4, r12\n\t" - "adc r5, r5, #0\n\t" - "str r6, [sp, #20]\n\t" - /* A[6] * B[0] = 6 */ - "ldr r10, [%[a], #24]\n\t" - "umull r3, r12, r10, r9\n\t" - "adds r4, r4, r3\n\t" - "mov r6, #0\n\t" - "adcs r5, r5, r12\n\t" - "adc r6, r6, #0\n\t" - /* A[5] * B[1] = 6 */ - "ldr r10, [%[a], #20]\n\t" - "umull r3, r12, r10, lr\n\t" - "adds r4, r4, r3\n\t" - "adcs r5, r5, r12\n\t" - "adc r6, r6, #0\n\t" - /* A[4] * B[2] = 6 */ - "ldr r10, [%[a], #16]\n\t" - "umull r3, r12, r10, r11\n\t" - "adds r4, r4, r3\n\t" - "adcs r5, r5, r12\n\t" - "adc r6, r6, #0\n\t" - /* A[3] * B[3] = 6 */ - "ldr r10, [%[a], #12]\n\t" - "ldr r11, [%[b], #12]\n\t" - "umull r3, r12, r10, r11\n\t" - "adds r4, r4, r3\n\t" - "adcs r5, r5, r12\n\t" - "adc r6, r6, #0\n\t" - /* A[2] * B[4] = 6 */ - "ldr r10, [%[a], #8]\n\t" - "ldr r11, [%[b], #16]\n\t" - "umull r3, r12, r10, r11\n\t" - "adds r4, r4, r3\n\t" - "adcs r5, r5, r12\n\t" - "adc r6, r6, #0\n\t" - /* A[1] * B[5] = 6 */ - "ldr r11, [%[b], #20]\n\t" - "umull r3, r12, r8, r11\n\t" - "adds r4, r4, r3\n\t" - "adcs r5, r5, r12\n\t" - "adc r6, r6, #0\n\t" - /* A[0] * B[6] = 6 */ - "ldr r11, [%[b], #24]\n\t" - "umull r3, r12, r7, r11\n\t" - "adds r4, r4, r3\n\t" - "adcs r5, r5, r12\n\t" - "adc r6, r6, #0\n\t" - "str r4, [sp, #24]\n\t" - /* A[0] * B[7] = 7 */ - "ldr r11, [%[b], #28]\n\t" - "umull r3, r12, r7, r11\n\t" - "adds r5, r5, r3\n\t" - "mov r4, #0\n\t" - "adcs r6, r6, r12\n\t" - "adc r4, r4, #0\n\t" - /* A[1] * B[6] = 7 */ - "ldr r11, [%[b], #24]\n\t" - "umull r3, r12, r8, r11\n\t" - "adds r5, r5, r3\n\t" - "adcs r6, r6, r12\n\t" - "adc r4, r4, #0\n\t" - /* A[2] * B[5] = 7 */ - "ldr r11, [%[b], #20]\n\t" - "umull r3, r12, r10, r11\n\t" - "adds r5, r5, r3\n\t" - "adcs r6, r6, r12\n\t" - "adc r4, r4, #0\n\t" - /* A[3] * B[4] = 7 */ - "ldr r10, [%[a], #12]\n\t" - "ldr r11, [%[b], #16]\n\t" - "umull r3, r12, r10, r11\n\t" - "adds r5, r5, r3\n\t" - "adcs r6, r6, r12\n\t" - "adc r4, r4, #0\n\t" - /* A[4] * B[3] = 7 */ - "ldr r10, [%[a], #16]\n\t" - "ldr r11, [%[b], #12]\n\t" - "umull r3, r12, r10, r11\n\t" - "adds r5, r5, r3\n\t" - "adcs r6, r6, r12\n\t" - "adc r4, r4, #0\n\t" - /* A[5] * B[2] = 7 */ - "ldr r10, [%[a], #20]\n\t" - "ldr r11, [%[b], #8]\n\t" - "umull r3, r12, r10, r11\n\t" - "adds r5, r5, r3\n\t" - "adcs r6, r6, r12\n\t" - "adc r4, r4, #0\n\t" - /* A[6] * B[1] = 7 */ - "ldr r10, [%[a], #24]\n\t" - "umull r3, r12, r10, lr\n\t" - "adds r5, r5, r3\n\t" - "adcs r6, r6, r12\n\t" - "adc r4, r4, #0\n\t" - /* A[7] * B[0] = 7 */ - "ldr r10, [%[a], #28]\n\t" - "umull r3, r12, r10, r9\n\t" - "adds r5, r5, r3\n\t" - "adcs r6, r6, r12\n\t" - "adc r4, r4, #0\n\t" - "str r5, [sp, #28]\n\t" - "ldr r7, [%[a], #24]\n\t" - "ldr r9, [%[b], #24]\n\t" - /* A[7] * B[1] = 8 */ - "umull r3, r12, r10, lr\n\t" - "adds r6, r6, r3\n\t" - "mov r5, #0\n\t" - "adcs r4, r4, r12\n\t" - "adc r5, r5, #0\n\t" - /* A[6] * B[2] = 8 */ - "umull r3, r12, r7, r11\n\t" - "adds r6, r6, r3\n\t" - "adcs r4, r4, r12\n\t" - "adc r5, r5, #0\n\t" - /* A[5] * B[3] = 8 */ - "ldr r10, [%[a], #20]\n\t" - "ldr r11, [%[b], #12]\n\t" - "umull r3, r12, r10, r11\n\t" - "adds r6, r6, r3\n\t" - "adcs r4, r4, r12\n\t" - "adc r5, r5, #0\n\t" - /* A[4] * B[4] = 8 */ - "ldr r10, [%[a], #16]\n\t" - "ldr r11, [%[b], #16]\n\t" - "umull r3, r12, r10, r11\n\t" - "adds r6, r6, r3\n\t" - "adcs r4, r4, r12\n\t" - "adc r5, r5, #0\n\t" - /* A[3] * B[5] = 8 */ - "ldr r10, [%[a], #12]\n\t" - "ldr r11, [%[b], #20]\n\t" - "umull r3, r12, r10, r11\n\t" - "adds r6, r6, r3\n\t" - "adcs r4, r4, r12\n\t" - "adc r5, r5, #0\n\t" - /* A[2] * B[6] = 8 */ - "ldr r10, [%[a], #8]\n\t" - "umull r3, r12, r10, r9\n\t" - "adds r6, r6, r3\n\t" - "adcs r4, r4, r12\n\t" - "adc r5, r5, #0\n\t" - /* A[1] * B[7] = 8 */ - "ldr r11, [%[b], #28]\n\t" - "umull r3, r12, r8, r11\n\t" - "adds r6, r6, r3\n\t" - "adcs r4, r4, r12\n\t" - "adc r5, r5, #0\n\t" - "str r6, [sp, #32]\n\t" - "ldr r8, [%[a], #28]\n\t" - "mov lr, r11\n\t" - /* A[2] * B[7] = 9 */ - "umull r3, r12, r10, lr\n\t" - "adds r4, r4, r3\n\t" - "mov r6, #0\n\t" - "adcs r5, r5, r12\n\t" - "adc r6, r6, #0\n\t" - /* A[3] * B[6] = 9 */ - "ldr r10, [%[a], #12]\n\t" - "umull r3, r12, r10, r9\n\t" - "adds r4, r4, r3\n\t" - "adcs r5, r5, r12\n\t" - "adc r6, r6, #0\n\t" - /* A[4] * B[5] = 9 */ - "ldr r10, [%[a], #16]\n\t" - "ldr r11, [%[b], #20]\n\t" - "umull r3, r12, r10, r11\n\t" - "adds r4, r4, r3\n\t" - "adcs r5, r5, r12\n\t" - "adc r6, r6, #0\n\t" - /* A[5] * B[4] = 9 */ - "ldr r10, [%[a], #20]\n\t" - "ldr r11, [%[b], #16]\n\t" - "umull r3, r12, r10, r11\n\t" - "adds r4, r4, r3\n\t" - "adcs r5, r5, r12\n\t" - "adc r6, r6, #0\n\t" - /* A[6] * B[3] = 9 */ - "ldr r11, [%[b], #12]\n\t" - "umull r3, r12, r7, r11\n\t" - "adds r4, r4, r3\n\t" - "adcs r5, r5, r12\n\t" - "adc r6, r6, #0\n\t" - /* A[7] * B[2] = 9 */ - "ldr r11, [%[b], #8]\n\t" - "umull r3, r12, r8, r11\n\t" - "adds r4, r4, r3\n\t" - "adcs r5, r5, r12\n\t" - "adc r6, r6, #0\n\t" - "str r4, [sp, #36]\n\t" - /* A[7] * B[3] = 10 */ - "ldr r11, [%[b], #12]\n\t" - "umull r3, r12, r8, r11\n\t" - "adds r5, r5, r3\n\t" - "mov r4, #0\n\t" - "adcs r6, r6, r12\n\t" - "adc r4, r4, #0\n\t" - /* A[6] * B[4] = 10 */ - "ldr r11, [%[b], #16]\n\t" - "umull r3, r12, r7, r11\n\t" - "adds r5, r5, r3\n\t" - "adcs r6, r6, r12\n\t" - "adc r4, r4, #0\n\t" - /* A[5] * B[5] = 10 */ - "ldr r11, [%[b], #20]\n\t" - "umull r3, r12, r10, r11\n\t" - "adds r5, r5, r3\n\t" - "adcs r6, r6, r12\n\t" - "adc r4, r4, #0\n\t" - /* A[4] * B[6] = 10 */ - "ldr r10, [%[a], #16]\n\t" - "umull r3, r12, r10, r9\n\t" - "adds r5, r5, r3\n\t" - "adcs r6, r6, r12\n\t" - "adc r4, r4, #0\n\t" - /* A[3] * B[7] = 10 */ - "ldr r10, [%[a], #12]\n\t" - "umull r3, r12, r10, lr\n\t" - "adds r5, r5, r3\n\t" - "adcs r6, r6, r12\n\t" - "adc r4, r4, #0\n\t" - "str r5, [sp, #40]\n\t" - /* A[4] * B[7] = 11 */ - "ldr r10, [%[a], #16]\n\t" - "umull r3, r12, r10, lr\n\t" - "adds r6, r6, r3\n\t" - "mov r5, #0\n\t" - "adcs r4, r4, r12\n\t" - "adc r5, r5, #0\n\t" - /* A[5] * B[6] = 11 */ - "ldr r10, [%[a], #20]\n\t" - "umull r3, r12, r10, r9\n\t" - "adds r6, r6, r3\n\t" - "adcs r4, r4, r12\n\t" - "adc r5, r5, #0\n\t" - /* A[6] * B[5] = 11 */ - "umull r3, r12, r7, r11\n\t" - "adds r6, r6, r3\n\t" - "adcs r4, r4, r12\n\t" - "adc r5, r5, #0\n\t" - /* A[7] * B[4] = 11 */ - "ldr r11, [%[b], #16]\n\t" - "umull r3, r12, r8, r11\n\t" - "adds r6, r6, r3\n\t" - "adcs r4, r4, r12\n\t" - "adc r5, r5, #0\n\t" - "str r6, [sp, #44]\n\t" - /* A[7] * B[5] = 12 */ - "ldr r11, [%[b], #20]\n\t" - "umull r3, r12, r8, r11\n\t" - "adds r4, r4, r3\n\t" - "mov r6, #0\n\t" - "adcs r5, r5, r12\n\t" - "adc r6, r6, #0\n\t" - /* A[6] * B[6] = 12 */ - "umull r3, r12, r7, r9\n\t" - "adds r4, r4, r3\n\t" - "adcs r5, r5, r12\n\t" - "adc r6, r6, #0\n\t" - /* A[5] * B[7] = 12 */ - "umull r3, r12, r10, lr\n\t" - "adds r4, r4, r3\n\t" - "adcs r5, r5, r12\n\t" - "adc r6, r6, #0\n\t" - "str r4, [sp, #48]\n\t" - /* A[6] * B[7] = 13 */ - "umull r3, r12, r7, lr\n\t" - "adds r5, r5, r3\n\t" - "mov r4, #0\n\t" - "adcs r6, r6, r12\n\t" - "adc r4, r4, #0\n\t" - /* A[7] * B[6] = 13 */ - "umull r3, r12, r8, r9\n\t" - "adds r5, r5, r3\n\t" - "adcs r6, r6, r12\n\t" - "adc r4, r4, #0\n\t" - "str r5, [sp, #52]\n\t" - /* A[7] * B[7] = 14 */ - "umull r3, r12, r8, lr\n\t" - "adds r6, r6, r3\n\t" - "adc r4, r4, r12\n\t" - "str r6, [sp, #56]\n\t" - "str r4, [sp, #60]\n\t" - /* Reduce */ - /* Load bottom half */ - "ldrd r4, r5, [sp]\n\t" - "ldrd r6, r7, [sp, #8]\n\t" - "ldrd r8, r9, [sp, #16]\n\t" - "ldrd r10, r11, [sp, #24]\n\t" - "lsr r3, r11, #31\n\t" - "and r11, r11, #0x7fffffff\n\t" - "mov lr, #19\n\t" - "ldr %[a], [sp, #32]\n\t" - "orr r3, r3, %[a], lsl #1\n\t" - "umull r3, r12, lr, r3\n\t" - "adds r4, r4, r3\n\t" - "mov %[b], #0\n\t" - "adcs r5, r5, r12\n\t" - "adc %[b], %[b], #0\n\t" - "lsr r3, %[a], #31\n\t" - "ldr %[a], [sp, #36]\n\t" - "orr r3, r3, %[a], lsl #1\n\t" - "umull r3, r12, lr, r3\n\t" - "add r12, r12, %[b]\n\t" - "adds r5, r5, r3\n\t" - "mov %[b], #0\n\t" - "adcs r6, r6, r12\n\t" - "adc %[b], %[b], #0\n\t" - "lsr r3, %[a], #31\n\t" - "ldr %[a], [sp, #40]\n\t" - "orr r3, r3, %[a], lsl #1\n\t" - "umull r3, r12, lr, r3\n\t" - "add r12, r12, %[b]\n\t" - "adds r6, r6, r3\n\t" - "mov %[b], #0\n\t" - "adcs r7, r7, r12\n\t" - "adc %[b], %[b], #0\n\t" - "lsr r3, %[a], #31\n\t" - "ldr %[a], [sp, #44]\n\t" - "orr r3, r3, %[a], lsl #1\n\t" - "umull r3, r12, lr, r3\n\t" - "add r12, r12, %[b]\n\t" - "adds r7, r7, r3\n\t" - "mov %[b], #0\n\t" - "adcs r8, r8, r12\n\t" - "adc %[b], %[b], #0\n\t" - "lsr r3, %[a], #31\n\t" - "ldr %[a], [sp, #48]\n\t" - "orr r3, r3, %[a], lsl #1\n\t" - "umull r3, r12, lr, r3\n\t" - "add r12, r12, %[b]\n\t" - "adds r8, r8, r3\n\t" - "mov %[b], #0\n\t" - "adcs r9, r9, r12\n\t" - "adc %[b], %[b], #0\n\t" - "lsr r3, %[a], #31\n\t" - "ldr %[a], [sp, #52]\n\t" - "orr r3, r3, %[a], lsl #1\n\t" - "umull r3, r12, lr, r3\n\t" - "add r12, r12, %[b]\n\t" - "adds r9, r9, r3\n\t" - "mov %[b], #0\n\t" - "adcs r10, r10, r12\n\t" - "adc %[b], %[b], #0\n\t" - "lsr r3, %[a], #31\n\t" - "ldr %[a], [sp, #56]\n\t" - "orr r3, r3, %[a], lsl #1\n\t" - "umull r3, r12, lr, r3\n\t" - "add r12, r12, %[b]\n\t" - "adds r10, r10, r3\n\t" - "mov %[b], #0\n\t" - "adcs r11, r11, r12\n\t" - "adc %[b], %[b], #0\n\t" - "lsr r3, %[a], #31\n\t" - "ldr %[a], [sp, #60]\n\t" - "orr r3, r3, %[a], lsl #1\n\t" - "umull r3, r12, lr, r3\n\t" - "adds r11, r11, r3\n\t" - "adc r3, r12, %[b]\n\t" - /* Overflow */ - "lsl r3, r3, #1\n\t" - "orr r3, r3, r11, lsr #31\n\t" - "mul r3, r3, lr\n\t" - "and r11, r11, #0x7fffffff\n\t" - "adds r4, r4, r3\n\t" - "adcs r5, r5, #0\n\t" - "adcs r6, r6, #0\n\t" - "adcs r7, r7, #0\n\t" - "adcs r8, r8, #0\n\t" - "adcs r9, r9, #0\n\t" - "adcs r10, r10, #0\n\t" - "adc r11, r11, #0\n\t" - /* Reduce if top bit set */ - "asr r3, r11, #31\n\t" - "and r3, r3, lr\n\t" - "and r11, r11, #0x7fffffff\n\t" - "adds r4, r4, r3\n\t" - "adcs r5, r5, #0\n\t" - "adcs r6, r6, #0\n\t" - "adcs r7, r7, #0\n\t" - "adcs r8, r8, #0\n\t" - "adcs r9, r9, #0\n\t" - "adcs r10, r10, #0\n\t" - "adc r11, r11, #0\n\t" - /* Store */ - "strd r4, r5, [%[r]]\n\t" - "strd r6, r7, [%[r], #8]\n\t" - "strd r8, r9, [%[r], #16]\n\t" - "strd r10, r11, [%[r], #24]\n\t" - "add sp, sp, #0x40\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" - ); -} - -void fe_sq(fe r, const fe a) -{ - __asm__ __volatile__ ( - "sub sp, sp, #0x40\n\t" - /* Square */ - "ldr r7, [%[a]]\n\t" - "ldr r8, [%[a], #4]\n\t" - "ldr r9, [%[a], #8]\n\t" - "ldr r10, [%[a], #12]\n\t" - "ldr r12, [%[a], #16]\n\t" - /* A[0] * A[0] = 0 */ - "umull r4, r5, r7, r7\n\t" - "str r4, [sp]\n\t" - /* A[0] * A[1] = 1 */ - "umull r2, r3, r7, r8\n\t" - "mov r6, #0\n\t" - "adds r5, r5, r2\n\t" - "adc r6, r6, r3\n\t" - "adds r5, r5, r2\n\t" - "mov r4, #0\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "str r5, [sp, #4]\n\t" - /* A[1] * A[1] = 2 */ - "umull r2, r3, r8, r8\n\t" - "adds r6, r6, r2\n\t" - "adc r4, r4, r3\n\t" - /* A[0] * A[2] = 2 */ - "umull r2, r3, r7, r9\n\t" - "adds r6, r6, r2\n\t" - "mov r5, #0\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "str r6, [sp, #8]\n\t" - /* A[0] * A[3] = 3 */ - "umull r2, r3, r7, r10\n\t" - "adds r4, r4, r2\n\t" - "adc r5, r5, r3\n\t" - "adds r4, r4, r2\n\t" - "mov r6, #0\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - /* A[1] * A[2] = 3 */ - "umull r2, r3, r8, r9\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - "str r4, [sp, #12]\n\t" - /* A[2] * A[2] = 4 */ - "umull r2, r3, r9, r9\n\t" - "adds r5, r5, r2\n\t" - "mov r4, #0\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - /* A[1] * A[3] = 4 */ - "umull r2, r3, r8, r10\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - /* A[0] * A[4] = 4 */ - "umull r2, r3, r7, r12\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "str r5, [sp, #16]\n\t" - /* A[0] * A[5] = 5 */ - "ldr r11, [%[a], #20]\n\t" - "umull r2, r3, r7, r11\n\t" - "adds r6, r6, r2\n\t" - "mov r5, #0\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - /* A[1] * A[4] = 5 */ - "umull r2, r3, r8, r12\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - /* A[2] * A[3] = 5 */ - "umull r2, r3, r9, r10\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "str r6, [sp, #20]\n\t" - /* A[3] * A[3] = 6 */ - "umull r2, r3, r10, r10\n\t" - "adds r4, r4, r2\n\t" - "mov r6, #0\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - /* A[2] * A[4] = 6 */ - "umull r2, r3, r9, r12\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - /* A[1] * A[5] = 6 */ - "umull r2, r3, r8, r11\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - /* A[0] * A[6] = 6 */ - "ldr r11, [%[a], #24]\n\t" - "umull r2, r3, r7, r11\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - "str r4, [sp, #24]\n\t" - /* A[0] * A[7] = 7 */ - "ldr r11, [%[a], #28]\n\t" - "umull r2, r3, r7, r11\n\t" - "adds r5, r5, r2\n\t" - "mov r4, #0\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - /* A[1] * A[6] = 7 */ - "ldr r11, [%[a], #24]\n\t" - "umull r2, r3, r8, r11\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - /* A[2] * A[5] = 7 */ - "ldr r11, [%[a], #20]\n\t" - "umull r2, r3, r9, r11\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - /* A[3] * A[4] = 7 */ - "umull r2, r3, r10, r12\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "str r5, [sp, #28]\n\t" - /* A[4] * A[4] = 8 */ - "umull r2, r3, r12, r12\n\t" - "adds r6, r6, r2\n\t" - "mov r5, #0\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - /* A[3] * A[5] = 8 */ - "umull r2, r3, r10, r11\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - /* A[2] * A[6] = 8 */ - "ldr r11, [%[a], #24]\n\t" - "umull r2, r3, r9, r11\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - /* A[1] * A[7] = 8 */ - "ldr r11, [%[a], #28]\n\t" - "umull r2, r3, r8, r11\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "str r6, [sp, #32]\n\t" - "ldr r7, [%[a], #20]\n\t" - /* A[2] * A[7] = 9 */ - "umull r2, r3, r9, r11\n\t" - "adds r4, r4, r2\n\t" - "mov r6, #0\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - /* A[3] * A[6] = 9 */ - "ldr r11, [%[a], #24]\n\t" - "umull r2, r3, r10, r11\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - /* A[4] * A[5] = 9 */ - "umull r2, r3, r12, r7\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - "str r4, [sp, #36]\n\t" - "mov r8, r11\n\t" - /* A[5] * A[5] = 10 */ - "umull r2, r3, r7, r7\n\t" - "adds r5, r5, r2\n\t" - "mov r4, #0\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - /* A[4] * A[6] = 10 */ - "umull r2, r3, r12, r8\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - /* A[3] * A[7] = 10 */ - "ldr r11, [%[a], #28]\n\t" - "umull r2, r3, r10, r11\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "str r5, [sp, #40]\n\t" - "mov r9, r11\n\t" - /* A[4] * A[7] = 11 */ - "umull r2, r3, r12, r9\n\t" - "adds r6, r6, r2\n\t" - "mov r5, #0\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - /* A[5] * A[6] = 11 */ - "umull r2, r3, r7, r8\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "str r6, [sp, #44]\n\t" - /* A[6] * A[6] = 12 */ - "umull r2, r3, r8, r8\n\t" - "adds r4, r4, r2\n\t" - "mov r6, #0\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - /* A[5] * A[7] = 12 */ - "umull r2, r3, r7, r9\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - "str r4, [sp, #48]\n\t" - /* A[6] * A[7] = 13 */ - "umull r2, r3, r8, r9\n\t" - "adds r5, r5, r2\n\t" - "mov r4, #0\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "str r5, [sp, #52]\n\t" - /* A[7] * A[7] = 14 */ - "umull r2, r3, r9, r9\n\t" - "adds r6, r6, r2\n\t" - "adc r4, r4, r3\n\t" - "str r6, [sp, #56]\n\t" - "str r4, [sp, #60]\n\t" - /* Reduce */ - /* Load bottom half */ - "ldrd r4, r5, [sp]\n\t" - "ldrd r6, r7, [sp, #8]\n\t" - "ldrd r8, r9, [sp, #16]\n\t" - "ldrd r10, r11, [sp, #24]\n\t" - "lsr r2, r11, #31\n\t" - "and r11, r11, #0x7fffffff\n\t" - "mov r12, #19\n\t" - "ldr %[a], [sp, #32]\n\t" - "orr r2, r2, %[a], lsl #1\n\t" - "umull r2, r3, r12, r2\n\t" - "adds r4, r4, r2\n\t" - "mov lr, #0\n\t" - "adcs r5, r5, r3\n\t" - "adc lr, lr, #0\n\t" - "lsr r2, %[a], #31\n\t" - "ldr %[a], [sp, #36]\n\t" - "orr r2, r2, %[a], lsl #1\n\t" - "umull r2, r3, r12, r2\n\t" - "add r3, r3, lr\n\t" - "adds r5, r5, r2\n\t" - "mov lr, #0\n\t" - "adcs r6, r6, r3\n\t" - "adc lr, lr, #0\n\t" - "lsr r2, %[a], #31\n\t" - "ldr %[a], [sp, #40]\n\t" - "orr r2, r2, %[a], lsl #1\n\t" - "umull r2, r3, r12, r2\n\t" - "add r3, r3, lr\n\t" - "adds r6, r6, r2\n\t" - "mov lr, #0\n\t" - "adcs r7, r7, r3\n\t" - "adc lr, lr, #0\n\t" - "lsr r2, %[a], #31\n\t" - "ldr %[a], [sp, #44]\n\t" - "orr r2, r2, %[a], lsl #1\n\t" - "umull r2, r3, r12, r2\n\t" - "add r3, r3, lr\n\t" - "adds r7, r7, r2\n\t" - "mov lr, #0\n\t" - "adcs r8, r8, r3\n\t" - "adc lr, lr, #0\n\t" - "lsr r2, %[a], #31\n\t" - "ldr %[a], [sp, #48]\n\t" - "orr r2, r2, %[a], lsl #1\n\t" - "umull r2, r3, r12, r2\n\t" - "add r3, r3, lr\n\t" - "adds r8, r8, r2\n\t" - "mov lr, #0\n\t" - "adcs r9, r9, r3\n\t" - "adc lr, lr, #0\n\t" - "lsr r2, %[a], #31\n\t" - "ldr %[a], [sp, #52]\n\t" - "orr r2, r2, %[a], lsl #1\n\t" - "umull r2, r3, r12, r2\n\t" - "add r3, r3, lr\n\t" - "adds r9, r9, r2\n\t" - "mov lr, #0\n\t" - "adcs r10, r10, r3\n\t" - "adc lr, lr, #0\n\t" - "lsr r2, %[a], #31\n\t" - "ldr %[a], [sp, #56]\n\t" - "orr r2, r2, %[a], lsl #1\n\t" - "umull r2, r3, r12, r2\n\t" - "add r3, r3, lr\n\t" - "adds r10, r10, r2\n\t" - "mov lr, #0\n\t" - "adcs r11, r11, r3\n\t" - "adc lr, lr, #0\n\t" - "lsr r2, %[a], #31\n\t" - "ldr %[a], [sp, #60]\n\t" - "orr r2, r2, %[a], lsl #1\n\t" - "umull r2, r3, r12, r2\n\t" - "adds r11, r11, r2\n\t" - "adc r2, r3, lr\n\t" - /* Overflow */ - "lsl r2, r2, #1\n\t" - "orr r2, r2, r11, lsr #31\n\t" - "mul r2, r2, r12\n\t" - "and r11, r11, #0x7fffffff\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, #0\n\t" - "adcs r6, r6, #0\n\t" - "adcs r7, r7, #0\n\t" - "adcs r8, r8, #0\n\t" - "adcs r9, r9, #0\n\t" - "adcs r10, r10, #0\n\t" - "adc r11, r11, #0\n\t" - /* Reduce if top bit set */ - "asr r2, r11, #31\n\t" - "and r2, r2, r12\n\t" - "and r11, r11, #0x7fffffff\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, #0\n\t" - "adcs r6, r6, #0\n\t" - "adcs r7, r7, #0\n\t" - "adcs r8, r8, #0\n\t" - "adcs r9, r9, #0\n\t" - "adcs r10, r10, #0\n\t" - "adc r11, r11, #0\n\t" - /* Store */ - "strd r4, r5, [%[r]]\n\t" - "strd r6, r7, [%[r], #8]\n\t" - "strd r8, r9, [%[r], #16]\n\t" - "strd r10, r11, [%[r], #24]\n\t" - "add sp, sp, #0x40\n\t" - : [r] "+r" (r), [a] "+r" (a) - : - : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" - ); -} - -void fe_mul121666(fe r, fe a) -{ - __asm__ __volatile__ ( - /* Multiply by 121666 */ - "ldrd r2, r3, [%[a]]\n\t" - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[a], #16]\n\t" - "ldrd r8, r9, [%[a], #24]\n\t" - "movw lr, #0xdb42\n\t" - "movt lr, #1\n\t" - "umull r2, r10, r2, lr\n\t" - "umull r3, r12, r3, lr\n\t" - "adds r3, r3, r10\n\t" - "adc r10, r12, #0\n\t" - "umull r4, r12, r4, lr\n\t" - "adds r4, r4, r10\n\t" - "adc r10, r12, #0\n\t" - "umull r5, r12, r5, lr\n\t" - "adds r5, r5, r10\n\t" - "adc r10, r12, #0\n\t" - "umull r6, r12, r6, lr\n\t" - "adds r6, r6, r10\n\t" - "adc r10, r12, #0\n\t" - "umull r7, r12, r7, lr\n\t" - "adds r7, r7, r10\n\t" - "adc r10, r12, #0\n\t" - "umull r8, r12, r8, lr\n\t" - "adds r8, r8, r10\n\t" - "adc r10, r12, #0\n\t" - "umull r9, r12, r9, lr\n\t" - "adds r9, r9, r10\n\t" - "adc r10, r12, #0\n\t" - "mov lr, #19\n\t" - "lsl r10, r10, #1\n\t" - "orr r10, r10, r9, lsr #31\n\t" - "mul r10, r10, lr\n\t" - "and r9, r9, #0x7fffffff\n\t" - "adds r2, r2, r10\n\t" - "adcs r3, r3, #0\n\t" - "adcs r4, r4, #0\n\t" - "adcs r5, r5, #0\n\t" - "adcs r6, r6, #0\n\t" - "adcs r7, r7, #0\n\t" - "adcs r8, r8, #0\n\t" - "adc r9, r9, #0\n\t" - "strd r2, r3, [%[r]]\n\t" - "strd r4, r5, [%[r], #8]\n\t" - "strd r6, r7, [%[r], #16]\n\t" - "strd r8, r9, [%[r], #24]\n\t" - : [r] "+r" (r), [a] "+r" (a) - : - : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10" - ); -} - -void fe_sq2(fe r, const fe a) -{ - __asm__ __volatile__ ( - "sub sp, sp, #0x40\n\t" - /* Square * 2 */ - "ldr r7, [%[a]]\n\t" - "ldr r8, [%[a], #4]\n\t" - "ldr r9, [%[a], #8]\n\t" - "ldr r10, [%[a], #12]\n\t" - "ldr r12, [%[a], #16]\n\t" - /* A[0] * A[0] = 0 */ - "umull r4, r5, r7, r7\n\t" - "str r4, [sp]\n\t" - /* A[0] * A[1] = 1 */ - "umull r2, r3, r7, r8\n\t" - "mov r6, #0\n\t" - "adds r5, r5, r2\n\t" - "adc r6, r6, r3\n\t" - "adds r5, r5, r2\n\t" - "mov r4, #0\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "str r5, [sp, #4]\n\t" - /* A[1] * A[1] = 2 */ - "umull r2, r3, r8, r8\n\t" - "adds r6, r6, r2\n\t" - "adc r4, r4, r3\n\t" - /* A[0] * A[2] = 2 */ - "umull r2, r3, r7, r9\n\t" - "adds r6, r6, r2\n\t" - "mov r5, #0\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "str r6, [sp, #8]\n\t" - /* A[0] * A[3] = 3 */ - "umull r2, r3, r7, r10\n\t" - "adds r4, r4, r2\n\t" - "adc r5, r5, r3\n\t" - "adds r4, r4, r2\n\t" - "mov r6, #0\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - /* A[1] * A[2] = 3 */ - "umull r2, r3, r8, r9\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - "str r4, [sp, #12]\n\t" - /* A[2] * A[2] = 4 */ - "umull r2, r3, r9, r9\n\t" - "adds r5, r5, r2\n\t" - "mov r4, #0\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - /* A[1] * A[3] = 4 */ - "umull r2, r3, r8, r10\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - /* A[0] * A[4] = 4 */ - "umull r2, r3, r7, r12\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "str r5, [sp, #16]\n\t" - /* A[0] * A[5] = 5 */ - "ldr r11, [%[a], #20]\n\t" - "umull r2, r3, r7, r11\n\t" - "adds r6, r6, r2\n\t" - "mov r5, #0\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - /* A[1] * A[4] = 5 */ - "umull r2, r3, r8, r12\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - /* A[2] * A[3] = 5 */ - "umull r2, r3, r9, r10\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "str r6, [sp, #20]\n\t" - /* A[3] * A[3] = 6 */ - "umull r2, r3, r10, r10\n\t" - "adds r4, r4, r2\n\t" - "mov r6, #0\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - /* A[2] * A[4] = 6 */ - "umull r2, r3, r9, r12\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - /* A[1] * A[5] = 6 */ - "umull r2, r3, r8, r11\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - /* A[0] * A[6] = 6 */ - "ldr r11, [%[a], #24]\n\t" - "umull r2, r3, r7, r11\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - "str r4, [sp, #24]\n\t" - /* A[0] * A[7] = 7 */ - "ldr r11, [%[a], #28]\n\t" - "umull r2, r3, r7, r11\n\t" - "adds r5, r5, r2\n\t" - "mov r4, #0\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - /* A[1] * A[6] = 7 */ - "ldr r11, [%[a], #24]\n\t" - "umull r2, r3, r8, r11\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - /* A[2] * A[5] = 7 */ - "ldr r11, [%[a], #20]\n\t" - "umull r2, r3, r9, r11\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - /* A[3] * A[4] = 7 */ - "umull r2, r3, r10, r12\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "str r5, [sp, #28]\n\t" - /* A[4] * A[4] = 8 */ - "umull r2, r3, r12, r12\n\t" - "adds r6, r6, r2\n\t" - "mov r5, #0\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - /* A[3] * A[5] = 8 */ - "umull r2, r3, r10, r11\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - /* A[2] * A[6] = 8 */ - "ldr r11, [%[a], #24]\n\t" - "umull r2, r3, r9, r11\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - /* A[1] * A[7] = 8 */ - "ldr r11, [%[a], #28]\n\t" - "umull r2, r3, r8, r11\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "str r6, [sp, #32]\n\t" - "ldr r7, [%[a], #20]\n\t" - /* A[2] * A[7] = 9 */ - "umull r2, r3, r9, r11\n\t" - "adds r4, r4, r2\n\t" - "mov r6, #0\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - /* A[3] * A[6] = 9 */ - "ldr r11, [%[a], #24]\n\t" - "umull r2, r3, r10, r11\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - /* A[4] * A[5] = 9 */ - "umull r2, r3, r12, r7\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - "str r4, [sp, #36]\n\t" - "mov r8, r11\n\t" - /* A[5] * A[5] = 10 */ - "umull r2, r3, r7, r7\n\t" - "adds r5, r5, r2\n\t" - "mov r4, #0\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - /* A[4] * A[6] = 10 */ - "umull r2, r3, r12, r8\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - /* A[3] * A[7] = 10 */ - "ldr r11, [%[a], #28]\n\t" - "umull r2, r3, r10, r11\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "str r5, [sp, #40]\n\t" - "mov r9, r11\n\t" - /* A[4] * A[7] = 11 */ - "umull r2, r3, r12, r9\n\t" - "adds r6, r6, r2\n\t" - "mov r5, #0\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - /* A[5] * A[6] = 11 */ - "umull r2, r3, r7, r8\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "adds r6, r6, r2\n\t" - "adcs r4, r4, r3\n\t" - "adc r5, r5, #0\n\t" - "str r6, [sp, #44]\n\t" - /* A[6] * A[6] = 12 */ - "umull r2, r3, r8, r8\n\t" - "adds r4, r4, r2\n\t" - "mov r6, #0\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - /* A[5] * A[7] = 12 */ - "umull r2, r3, r7, r9\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, r3\n\t" - "adc r6, r6, #0\n\t" - "str r4, [sp, #48]\n\t" - /* A[6] * A[7] = 13 */ - "umull r2, r3, r8, r9\n\t" - "adds r5, r5, r2\n\t" - "mov r4, #0\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "adds r5, r5, r2\n\t" - "adcs r6, r6, r3\n\t" - "adc r4, r4, #0\n\t" - "str r5, [sp, #52]\n\t" - /* A[7] * A[7] = 14 */ - "umull r2, r3, r9, r9\n\t" - "adds r6, r6, r2\n\t" - "adc r4, r4, r3\n\t" - "str r6, [sp, #56]\n\t" - "str r4, [sp, #60]\n\t" - /* Double and Reduce */ - /* Load bottom half */ - "ldrd r4, r5, [sp]\n\t" - "ldrd r6, r7, [sp, #8]\n\t" - "ldrd r8, r9, [sp, #16]\n\t" - "ldrd r10, r11, [sp, #24]\n\t" - "lsr r2, r11, #30\n\t" - "lsl r11, r11, #1\n\t" - "orr r11, r11, r10, lsr #31\n\t" - "lsl r10, r10, #1\n\t" - "orr r10, r10, r9, lsr #31\n\t" - "lsl r9, r9, #1\n\t" - "orr r9, r9, r8, lsr #31\n\t" - "lsl r8, r8, #1\n\t" - "orr r8, r8, r7, lsr #31\n\t" - "lsl r7, r7, #1\n\t" - "orr r7, r7, r6, lsr #31\n\t" - "lsl r6, r6, #1\n\t" - "orr r6, r6, r5, lsr #31\n\t" - "lsl r5, r5, #1\n\t" - "orr r5, r5, r4, lsr #31\n\t" - "lsl r4, r4, #1\n\t" - "and r11, r11, #0x7fffffff\n\t" - "mov r12, #19\n\t" - "ldr %[a], [sp, #32]\n\t" - "orr r2, r2, %[a], lsl #2\n\t" - "umull r2, r3, r12, r2\n\t" - "adds r4, r4, r2\n\t" - "mov lr, #0\n\t" - "adcs r5, r5, r3\n\t" - "adc lr, lr, #0\n\t" - "lsr r2, %[a], #30\n\t" - "ldr %[a], [sp, #36]\n\t" - "orr r2, r2, %[a], lsl #2\n\t" - "umull r2, r3, r12, r2\n\t" - "add r3, r3, lr\n\t" - "adds r5, r5, r2\n\t" - "mov lr, #0\n\t" - "adcs r6, r6, r3\n\t" - "adc lr, lr, #0\n\t" - "lsr r2, %[a], #30\n\t" - "ldr %[a], [sp, #40]\n\t" - "orr r2, r2, %[a], lsl #2\n\t" - "umull r2, r3, r12, r2\n\t" - "add r3, r3, lr\n\t" - "adds r6, r6, r2\n\t" - "mov lr, #0\n\t" - "adcs r7, r7, r3\n\t" - "adc lr, lr, #0\n\t" - "lsr r2, %[a], #30\n\t" - "ldr %[a], [sp, #44]\n\t" - "orr r2, r2, %[a], lsl #2\n\t" - "umull r2, r3, r12, r2\n\t" - "add r3, r3, lr\n\t" - "adds r7, r7, r2\n\t" - "mov lr, #0\n\t" - "adcs r8, r8, r3\n\t" - "adc lr, lr, #0\n\t" - "lsr r2, %[a], #30\n\t" - "ldr %[a], [sp, #48]\n\t" - "orr r2, r2, %[a], lsl #2\n\t" - "umull r2, r3, r12, r2\n\t" - "add r3, r3, lr\n\t" - "adds r8, r8, r2\n\t" - "mov lr, #0\n\t" - "adcs r9, r9, r3\n\t" - "adc lr, lr, #0\n\t" - "lsr r2, %[a], #30\n\t" - "ldr %[a], [sp, #52]\n\t" - "orr r2, r2, %[a], lsl #2\n\t" - "umull r2, r3, r12, r2\n\t" - "add r3, r3, lr\n\t" - "adds r9, r9, r2\n\t" - "mov lr, #0\n\t" - "adcs r10, r10, r3\n\t" - "adc lr, lr, #0\n\t" - "lsr r2, %[a], #30\n\t" - "ldr %[a], [sp, #56]\n\t" - "orr r2, r2, %[a], lsl #2\n\t" - "umull r2, r3, r12, r2\n\t" - "add r3, r3, lr\n\t" - "adds r10, r10, r2\n\t" - "mov lr, #0\n\t" - "adcs r11, r11, r3\n\t" - "adc lr, lr, #0\n\t" - "lsr r2, %[a], #30\n\t" - "ldr %[a], [sp, #60]\n\t" - "orr r2, r2, %[a], lsl #2\n\t" - "umull r2, r3, r12, r2\n\t" - "adds r11, r11, r2\n\t" - "adc r2, r3, lr\n\t" - /* Overflow */ - "lsl r2, r2, #1\n\t" - "orr r2, r2, r11, lsr #31\n\t" - "mul r2, r2, r12\n\t" - "and r11, r11, #0x7fffffff\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, #0\n\t" - "adcs r6, r6, #0\n\t" - "adcs r7, r7, #0\n\t" - "adcs r8, r8, #0\n\t" - "adcs r9, r9, #0\n\t" - "adcs r10, r10, #0\n\t" - "adc r11, r11, #0\n\t" - /* Reduce if top bit set */ - "asr r2, r11, #31\n\t" - "and r2, r2, r12\n\t" - "and r11, r11, #0x7fffffff\n\t" - "adds r4, r4, r2\n\t" - "adcs r5, r5, #0\n\t" - "adcs r6, r6, #0\n\t" - "adcs r7, r7, #0\n\t" - "adcs r8, r8, #0\n\t" - "adcs r9, r9, #0\n\t" - "adcs r10, r10, #0\n\t" - "adc r11, r11, #0\n\t" - /* Store */ - "strd r4, r5, [%[r]]\n\t" - "strd r6, r7, [%[r], #8]\n\t" - "strd r8, r9, [%[r], #16]\n\t" - "strd r10, r11, [%[r], #24]\n\t" - "add sp, sp, #0x40\n\t" - : [r] "+r" (r), [a] "+r" (a) - : - : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" - ); -} - -void fe_invert(fe r, const fe a) -{ - __asm__ __volatile__ ( - "sub sp, sp, #0x88\n\t" - /* Invert */ - "str %[r], [sp, #128]\n\t" - "str %[a], [sp, #132]\n\t" - "mov r0, sp\n\t" - "ldr r1, [sp, #132]\n\t" - "bl fe_sq\n\t" - "add r0, sp, #32\n\t" - "mov r1, sp\n\t" - "bl fe_sq\n\t" - "add r0, sp, #32\n\t" - "add r1, sp, #32\n\t" - "bl fe_sq\n\t" - "add r0, sp, #32\n\t" - "ldr r1, [sp, #132]\n\t" - "add r2, sp, #32\n\t" - "bl fe_mul\n\t" - "mov r0, sp\n\t" - "mov r1, sp\n\t" - "add r2, sp, #32\n\t" - "bl fe_mul\n\t" - "add r0, sp, #0x40\n\t" - "mov r1, sp\n\t" - "bl fe_sq\n\t" - "add r0, sp, #32\n\t" - "add r1, sp, #32\n\t" - "add r2, sp, #0x40\n\t" - "bl fe_mul\n\t" - "add r0, sp, #0x40\n\t" - "add r1, sp, #32\n\t" - "bl fe_sq\n\t" - "mov r4, #4\n\t" - "\n" - "L_fe_invert1_%=: \n\t" - "add r0, sp, #0x40\n\t" - "add r1, sp, #0x40\n\t" - "bl fe_sq\n\t" - "sub r4, r4, #1\n\t" - "cmp r4, #0\n\t" - "bne L_fe_invert1_%=\n\t" - "add r0, sp, #32\n\t" - "add r1, sp, #0x40\n\t" - "add r2, sp, #32\n\t" - "bl fe_mul\n\t" - "add r0, sp, #0x40\n\t" - "add r1, sp, #32\n\t" - "bl fe_sq\n\t" - "mov r4, #9\n\t" - "\n" - "L_fe_invert2_%=: \n\t" - "add r0, sp, #0x40\n\t" - "add r1, sp, #0x40\n\t" - "bl fe_sq\n\t" - "sub r4, r4, #1\n\t" - "cmp r4, #0\n\t" - "bne L_fe_invert2_%=\n\t" - "add r0, sp, #0x40\n\t" - "add r1, sp, #0x40\n\t" - "add r2, sp, #32\n\t" - "bl fe_mul\n\t" - "add r0, sp, #0x60\n\t" - "add r1, sp, #0x40\n\t" - "bl fe_sq\n\t" - "mov r4, #19\n\t" - "\n" - "L_fe_invert3_%=: \n\t" - "add r0, sp, #0x60\n\t" - "add r1, sp, #0x60\n\t" - "bl fe_sq\n\t" - "sub r4, r4, #1\n\t" - "cmp r4, #0\n\t" - "bne L_fe_invert3_%=\n\t" - "add r0, sp, #0x40\n\t" - "add r1, sp, #0x60\n\t" - "add r2, sp, #0x40\n\t" - "bl fe_mul\n\t" - "mov r4, #10\n\t" - "\n" - "L_fe_invert4_%=: \n\t" - "add r0, sp, #0x40\n\t" - "add r1, sp, #0x40\n\t" - "bl fe_sq\n\t" - "sub r4, r4, #1\n\t" - "cmp r4, #0\n\t" - "bne L_fe_invert4_%=\n\t" - "add r0, sp, #32\n\t" - "add r1, sp, #0x40\n\t" - "add r2, sp, #32\n\t" - "bl fe_mul\n\t" - "add r0, sp, #0x40\n\t" - "add r1, sp, #32\n\t" - "bl fe_sq\n\t" - "mov r4, #49\n\t" - "\n" - "L_fe_invert5_%=: \n\t" - "add r0, sp, #0x40\n\t" - "add r1, sp, #0x40\n\t" - "bl fe_sq\n\t" - "sub r4, r4, #1\n\t" - "cmp r4, #0\n\t" - "bne L_fe_invert5_%=\n\t" - "add r0, sp, #0x40\n\t" - "add r1, sp, #0x40\n\t" - "add r2, sp, #32\n\t" - "bl fe_mul\n\t" - "add r0, sp, #0x60\n\t" - "add r1, sp, #0x40\n\t" - "bl fe_sq\n\t" - "mov r4, #0x63\n\t" - "\n" - "L_fe_invert6_%=: \n\t" - "add r0, sp, #0x60\n\t" - "add r1, sp, #0x60\n\t" - "bl fe_sq\n\t" - "sub r4, r4, #1\n\t" - "cmp r4, #0\n\t" - "bne L_fe_invert6_%=\n\t" - "add r0, sp, #0x40\n\t" - "add r1, sp, #0x60\n\t" - "add r2, sp, #0x40\n\t" - "bl fe_mul\n\t" - "mov r4, #50\n\t" - "\n" - "L_fe_invert7_%=: \n\t" - "add r0, sp, #0x40\n\t" - "add r1, sp, #0x40\n\t" - "bl fe_sq\n\t" - "sub r4, r4, #1\n\t" - "cmp r4, #0\n\t" - "bne L_fe_invert7_%=\n\t" - "add r0, sp, #32\n\t" - "add r1, sp, #0x40\n\t" - "add r2, sp, #32\n\t" - "bl fe_mul\n\t" - "mov r4, #5\n\t" - "\n" - "L_fe_invert8_%=: \n\t" - "add r0, sp, #32\n\t" - "add r1, sp, #32\n\t" - "bl fe_sq\n\t" - "sub r4, r4, #1\n\t" - "cmp r4, #0\n\t" - "bne L_fe_invert8_%=\n\t" - "ldr r0, [sp, #128]\n\t" - "add r1, sp, #32\n\t" - "mov r2, sp\n\t" - "bl fe_mul\n\t" - "ldr %[a], [sp, #132]\n\t" - "ldr %[r], [sp, #128]\n\t" - "add sp, sp, #0x88\n\t" - : [r] "+r" (r), [a] "+r" (a) - : - : "memory", "lr", "r4" - ); -} - -int curve25519(byte* r, byte* n, byte* a) -{ - __asm__ __volatile__ ( - "sub sp, sp, #0xbc\n\t" - "str %[r], [sp, #160]\n\t" - "str %[n], [sp, #164]\n\t" - "str %[a], [sp, #168]\n\t" - "mov %[n], #0\n\t" - "str %[n], [sp, #172]\n\t" - /* Set one */ - "mov r11, #1\n\t" - "mov r10, #0\n\t" - "strd r11, r10, [%[r]]\n\t" - "strd r10, r10, [%[r], #8]\n\t" - "strd r10, r10, [%[r], #16]\n\t" - "strd r10, r10, [%[r], #24]\n\t" - /* Set zero */ - "mov r10, #0\n\t" - "strd r10, r10, [sp]\n\t" - "strd r10, r10, [sp, #8]\n\t" - "strd r10, r10, [sp, #16]\n\t" - "strd r10, r10, [sp, #24]\n\t" - /* Set one */ - "mov r11, #1\n\t" - "mov r10, #0\n\t" - "strd r11, r10, [sp, #32]\n\t" - "strd r10, r10, [sp, #40]\n\t" - "strd r10, r10, [sp, #48]\n\t" - "strd r10, r10, [sp, #56]\n\t" - /* Copy */ - "ldrd r4, r5, [%[a]]\n\t" - "ldrd r6, r7, [%[a], #8]\n\t" - "strd r4, r5, [sp, #64]\n\t" - "strd r6, r7, [sp, #72]\n\t" - "ldrd r4, r5, [%[a], #16]\n\t" - "ldrd r6, r7, [%[a], #24]\n\t" - "strd r4, r5, [sp, #80]\n\t" - "strd r6, r7, [sp, #88]\n\t" - "mov %[n], #30\n\t" - "str %[n], [sp, #180]\n\t" - "mov %[a], #28\n\t" - "str %[a], [sp, #176]\n\t" - "\n" - "L_curve25519_words_%=: \n\t" - "\n" - "L_curve25519_bits_%=: \n\t" - "ldr %[n], [sp, #164]\n\t" - "ldr %[a], [%[n], r2]\n\t" - "ldr %[n], [sp, #180]\n\t" - "lsr %[a], %[a], %[n]\n\t" - "and %[a], %[a], #1\n\t" - "str %[a], [sp, #184]\n\t" - "ldr %[n], [sp, #172]\n\t" - "eor %[n], %[n], %[a]\n\t" - "str %[n], [sp, #172]\n\t" - "ldr %[r], [sp, #160]\n\t" - /* Conditional Swap */ - "neg %[n], %[n]\n\t" - "ldrd r4, r5, [%[r]]\n\t" - "ldrd r6, r7, [sp, #64]\n\t" - "eor r8, r4, r6\n\t" - "eor r9, r5, r7\n\t" - "and r8, r8, %[n]\n\t" - "and r9, r9, %[n]\n\t" - "eor r4, r4, r8\n\t" - "eor r5, r5, r9\n\t" - "eor r6, r6, r8\n\t" - "eor r7, r7, r9\n\t" - "strd r4, r5, [%[r]]\n\t" - "strd r6, r7, [sp, #64]\n\t" - "ldrd r4, r5, [%[r], #8]\n\t" - "ldrd r6, r7, [sp, #72]\n\t" - "eor r8, r4, r6\n\t" - "eor r9, r5, r7\n\t" - "and r8, r8, %[n]\n\t" - "and r9, r9, %[n]\n\t" - "eor r4, r4, r8\n\t" - "eor r5, r5, r9\n\t" - "eor r6, r6, r8\n\t" - "eor r7, r7, r9\n\t" - "strd r4, r5, [%[r], #8]\n\t" - "strd r6, r7, [sp, #72]\n\t" - "ldrd r4, r5, [%[r], #16]\n\t" - "ldrd r6, r7, [sp, #80]\n\t" - "eor r8, r4, r6\n\t" - "eor r9, r5, r7\n\t" - "and r8, r8, %[n]\n\t" - "and r9, r9, %[n]\n\t" - "eor r4, r4, r8\n\t" - "eor r5, r5, r9\n\t" - "eor r6, r6, r8\n\t" - "eor r7, r7, r9\n\t" - "strd r4, r5, [%[r], #16]\n\t" - "strd r6, r7, [sp, #80]\n\t" - "ldrd r4, r5, [%[r], #24]\n\t" - "ldrd r6, r7, [sp, #88]\n\t" - "eor r8, r4, r6\n\t" - "eor r9, r5, r7\n\t" - "and r8, r8, %[n]\n\t" - "and r9, r9, %[n]\n\t" - "eor r4, r4, r8\n\t" - "eor r5, r5, r9\n\t" - "eor r6, r6, r8\n\t" - "eor r7, r7, r9\n\t" - "strd r4, r5, [%[r], #24]\n\t" - "strd r6, r7, [sp, #88]\n\t" - "ldr %[n], [sp, #172]\n\t" - /* Conditional Swap */ - "neg %[n], %[n]\n\t" - "ldrd r4, r5, [sp]\n\t" - "ldrd r6, r7, [sp, #32]\n\t" - "eor r8, r4, r6\n\t" - "eor r9, r5, r7\n\t" - "and r8, r8, %[n]\n\t" - "and r9, r9, %[n]\n\t" - "eor r4, r4, r8\n\t" - "eor r5, r5, r9\n\t" - "eor r6, r6, r8\n\t" - "eor r7, r7, r9\n\t" - "strd r4, r5, [sp]\n\t" - "strd r6, r7, [sp, #32]\n\t" - "ldrd r4, r5, [sp, #8]\n\t" - "ldrd r6, r7, [sp, #40]\n\t" - "eor r8, r4, r6\n\t" - "eor r9, r5, r7\n\t" - "and r8, r8, %[n]\n\t" - "and r9, r9, %[n]\n\t" - "eor r4, r4, r8\n\t" - "eor r5, r5, r9\n\t" - "eor r6, r6, r8\n\t" - "eor r7, r7, r9\n\t" - "strd r4, r5, [sp, #8]\n\t" - "strd r6, r7, [sp, #40]\n\t" - "ldrd r4, r5, [sp, #16]\n\t" - "ldrd r6, r7, [sp, #48]\n\t" - "eor r8, r4, r6\n\t" - "eor r9, r5, r7\n\t" - "and r8, r8, %[n]\n\t" - "and r9, r9, %[n]\n\t" - "eor r4, r4, r8\n\t" - "eor r5, r5, r9\n\t" - "eor r6, r6, r8\n\t" - "eor r7, r7, r9\n\t" - "strd r4, r5, [sp, #16]\n\t" - "strd r6, r7, [sp, #48]\n\t" - "ldrd r4, r5, [sp, #24]\n\t" - "ldrd r6, r7, [sp, #56]\n\t" - "eor r8, r4, r6\n\t" - "eor r9, r5, r7\n\t" - "and r8, r8, %[n]\n\t" - "and r9, r9, %[n]\n\t" - "eor r4, r4, r8\n\t" - "eor r5, r5, r9\n\t" - "eor r6, r6, r8\n\t" - "eor r7, r7, r9\n\t" - "strd r4, r5, [sp, #24]\n\t" - "strd r6, r7, [sp, #56]\n\t" - "ldr %[n], [sp, #184]\n\t" - "str %[n], [sp, #172]\n\t" - /* Add-Sub */ - /* Add */ - "ldrd r4, r5, [%[r]]\n\t" - "ldrd r6, r7, [sp]\n\t" - "adds r8, r4, r6\n\t" - "mov r3, #0\n\t" - "adcs r9, r5, r7\n\t" - "adc r3, r3, #0\n\t" - "strd r8, r9, [%[r]]\n\t" - /* Sub */ - "subs r10, r4, r6\n\t" - "mov r12, #0\n\t" - "sbcs r11, r5, r7\n\t" - "adc r12, r12, #0\n\t" - "strd r10, r11, [sp, #128]\n\t" - /* Add */ - "ldrd r4, r5, [%[r], #8]\n\t" - "ldrd r6, r7, [sp, #8]\n\t" - "adds r3, r3, #-1\n\t" - "adcs r8, r4, r6\n\t" - "mov r3, #0\n\t" - "adcs r9, r5, r7\n\t" - "adc r3, r3, #0\n\t" - "strd r8, r9, [%[r], #8]\n\t" - /* Sub */ - "adds r12, r12, #-1\n\t" - "sbcs r10, r4, r6\n\t" - "mov r12, #0\n\t" - "sbcs r11, r5, r7\n\t" - "adc r12, r12, #0\n\t" - "strd r10, r11, [sp, #136]\n\t" - /* Add */ - "ldrd r4, r5, [%[r], #16]\n\t" - "ldrd r6, r7, [sp, #16]\n\t" - "adds r3, r3, #-1\n\t" - "adcs r8, r4, r6\n\t" - "mov r3, #0\n\t" - "adcs r9, r5, r7\n\t" - "adc r3, r3, #0\n\t" - "strd r8, r9, [%[r], #16]\n\t" - /* Sub */ - "adds r12, r12, #-1\n\t" - "sbcs r10, r4, r6\n\t" - "mov r12, #0\n\t" - "sbcs r11, r5, r7\n\t" - "adc r12, r12, #0\n\t" - "strd r10, r11, [sp, #144]\n\t" - /* Add */ - "ldrd r4, r5, [%[r], #24]\n\t" - "ldrd r6, r7, [sp, #24]\n\t" - "adds r3, r3, #-1\n\t" - "adcs r8, r4, r6\n\t" - "adc r9, r5, r7\n\t" - /* Sub */ - "adds r12, r12, #-1\n\t" - "sbcs r10, r4, r6\n\t" - "sbc r11, r5, r7\n\t" - "mov r3, #-19\n\t" - "asr %[a], r9, #31\n\t" - /* Mask the modulus */ - "and r3, %[a], r3\n\t" - "and r12, %[a], #0x7fffffff\n\t" - /* Sub modulus (if overflow) */ - "ldrd r4, r5, [%[r]]\n\t" - "subs r4, r4, r3\n\t" - "sbcs r5, r5, %[a]\n\t" - "strd r4, r5, [%[r]]\n\t" - "ldrd r4, r5, [%[r], #8]\n\t" - "sbcs r4, r4, %[a]\n\t" - "sbcs r5, r5, %[a]\n\t" - "strd r4, r5, [%[r], #8]\n\t" - "ldrd r4, r5, [%[r], #16]\n\t" - "sbcs r4, r4, %[a]\n\t" - "sbcs r5, r5, %[a]\n\t" - "strd r4, r5, [%[r], #16]\n\t" - "sbcs r8, r8, %[a]\n\t" - "sbc r9, r9, r12\n\t" - "strd r8, r9, [%[r], #24]\n\t" - "mov r3, #-19\n\t" - "asr %[a], r11, #31\n\t" - /* Mask the modulus */ - "and r3, %[a], r3\n\t" - "and r12, %[a], #0x7fffffff\n\t" - /* Add modulus (if underflow) */ - "ldrd r4, r5, [sp, #128]\n\t" - "adds r4, r4, r3\n\t" - "adcs r5, r5, %[a]\n\t" - "strd r4, r5, [sp, #128]\n\t" - "ldrd r4, r5, [sp, #136]\n\t" - "adcs r4, r4, %[a]\n\t" - "adcs r5, r5, %[a]\n\t" - "strd r4, r5, [sp, #136]\n\t" - "ldrd r4, r5, [sp, #144]\n\t" - "adcs r4, r4, %[a]\n\t" - "adcs r5, r5, %[a]\n\t" - "strd r4, r5, [sp, #144]\n\t" - "adcs r10, r10, %[a]\n\t" - "adc r11, r11, r12\n\t" - "strd r10, r11, [sp, #152]\n\t" - /* Add-Sub */ - /* Add */ - "ldrd r4, r5, [sp, #64]\n\t" - "ldrd r6, r7, [sp, #32]\n\t" - "adds r8, r4, r6\n\t" - "mov r3, #0\n\t" - "adcs r9, r5, r7\n\t" - "adc r3, r3, #0\n\t" - "strd r8, r9, [sp]\n\t" - /* Sub */ - "subs r10, r4, r6\n\t" - "mov r12, #0\n\t" - "sbcs r11, r5, r7\n\t" - "adc r12, r12, #0\n\t" - "strd r10, r11, [sp, #96]\n\t" - /* Add */ - "ldrd r4, r5, [sp, #72]\n\t" - "ldrd r6, r7, [sp, #40]\n\t" - "adds r3, r3, #-1\n\t" - "adcs r8, r4, r6\n\t" - "mov r3, #0\n\t" - "adcs r9, r5, r7\n\t" - "adc r3, r3, #0\n\t" - "strd r8, r9, [sp, #8]\n\t" - /* Sub */ - "adds r12, r12, #-1\n\t" - "sbcs r10, r4, r6\n\t" - "mov r12, #0\n\t" - "sbcs r11, r5, r7\n\t" - "adc r12, r12, #0\n\t" - "strd r10, r11, [sp, #104]\n\t" - /* Add */ - "ldrd r4, r5, [sp, #80]\n\t" - "ldrd r6, r7, [sp, #48]\n\t" - "adds r3, r3, #-1\n\t" - "adcs r8, r4, r6\n\t" - "mov r3, #0\n\t" - "adcs r9, r5, r7\n\t" - "adc r3, r3, #0\n\t" - "strd r8, r9, [sp, #16]\n\t" - /* Sub */ - "adds r12, r12, #-1\n\t" - "sbcs r10, r4, r6\n\t" - "mov r12, #0\n\t" - "sbcs r11, r5, r7\n\t" - "adc r12, r12, #0\n\t" - "strd r10, r11, [sp, #112]\n\t" - /* Add */ - "ldrd r4, r5, [sp, #88]\n\t" - "ldrd r6, r7, [sp, #56]\n\t" - "adds r3, r3, #-1\n\t" - "adcs r8, r4, r6\n\t" - "adc r9, r5, r7\n\t" - /* Sub */ - "adds r12, r12, #-1\n\t" - "sbcs r10, r4, r6\n\t" - "sbc r11, r5, r7\n\t" - "mov r3, #-19\n\t" - "asr %[a], r9, #31\n\t" - /* Mask the modulus */ - "and r3, %[a], r3\n\t" - "and r12, %[a], #0x7fffffff\n\t" - /* Sub modulus (if overflow) */ - "ldrd r4, r5, [sp]\n\t" - "subs r4, r4, r3\n\t" - "sbcs r5, r5, %[a]\n\t" - "strd r4, r5, [sp]\n\t" - "ldrd r4, r5, [sp, #8]\n\t" - "sbcs r4, r4, %[a]\n\t" - "sbcs r5, r5, %[a]\n\t" - "strd r4, r5, [sp, #8]\n\t" - "ldrd r4, r5, [sp, #16]\n\t" - "sbcs r4, r4, %[a]\n\t" - "sbcs r5, r5, %[a]\n\t" - "strd r4, r5, [sp, #16]\n\t" - "sbcs r8, r8, %[a]\n\t" - "sbc r9, r9, r12\n\t" - "strd r8, r9, [sp, #24]\n\t" - "mov r3, #-19\n\t" - "asr %[a], r11, #31\n\t" - /* Mask the modulus */ - "and r3, %[a], r3\n\t" - "and r12, %[a], #0x7fffffff\n\t" - /* Add modulus (if underflow) */ - "ldrd r4, r5, [sp, #96]\n\t" - "adds r4, r4, r3\n\t" - "adcs r5, r5, %[a]\n\t" - "strd r4, r5, [sp, #96]\n\t" - "ldrd r4, r5, [sp, #104]\n\t" - "adcs r4, r4, %[a]\n\t" - "adcs r5, r5, %[a]\n\t" - "strd r4, r5, [sp, #104]\n\t" - "ldrd r4, r5, [sp, #112]\n\t" - "adcs r4, r4, %[a]\n\t" - "adcs r5, r5, %[a]\n\t" - "strd r4, r5, [sp, #112]\n\t" - "adcs r10, r10, %[a]\n\t" - "adc r11, r11, r12\n\t" - "strd r10, r11, [sp, #120]\n\t" - "ldr r2, [sp, #160]\n\t" - "add r1, sp, #0x60\n\t" - "add r0, sp, #32\n\t" - "bl fe_mul\n\t" - "add r2, sp, #0x80\n\t" - "add r1, sp, #0\n\t" - "add r0, sp, #0\n\t" - "bl fe_mul\n\t" - "add r1, sp, #0x80\n\t" - "add r0, sp, #0x60\n\t" - "bl fe_sq\n\t" - "ldr r1, [sp, #160]\n\t" - "add r0, sp, #0x80\n\t" - "bl fe_sq\n\t" - /* Add-Sub */ - /* Add */ - "ldrd r4, r5, [sp, #32]\n\t" - "ldrd r6, r7, [sp]\n\t" - "adds r8, r4, r6\n\t" - "mov r3, #0\n\t" - "adcs r9, r5, r7\n\t" - "adc r3, r3, #0\n\t" - "strd r8, r9, [sp, #64]\n\t" - /* Sub */ - "subs r10, r4, r6\n\t" - "mov r12, #0\n\t" - "sbcs r11, r5, r7\n\t" - "adc r12, r12, #0\n\t" - "strd r10, r11, [sp]\n\t" - /* Add */ - "ldrd r4, r5, [sp, #40]\n\t" - "ldrd r6, r7, [sp, #8]\n\t" - "adds r3, r3, #-1\n\t" - "adcs r8, r4, r6\n\t" - "mov r3, #0\n\t" - "adcs r9, r5, r7\n\t" - "adc r3, r3, #0\n\t" - "strd r8, r9, [sp, #72]\n\t" - /* Sub */ - "adds r12, r12, #-1\n\t" - "sbcs r10, r4, r6\n\t" - "mov r12, #0\n\t" - "sbcs r11, r5, r7\n\t" - "adc r12, r12, #0\n\t" - "strd r10, r11, [sp, #8]\n\t" - /* Add */ - "ldrd r4, r5, [sp, #48]\n\t" - "ldrd r6, r7, [sp, #16]\n\t" - "adds r3, r3, #-1\n\t" - "adcs r8, r4, r6\n\t" - "mov r3, #0\n\t" - "adcs r9, r5, r7\n\t" - "adc r3, r3, #0\n\t" - "strd r8, r9, [sp, #80]\n\t" - /* Sub */ - "adds r12, r12, #-1\n\t" - "sbcs r10, r4, r6\n\t" - "mov r12, #0\n\t" - "sbcs r11, r5, r7\n\t" - "adc r12, r12, #0\n\t" - "strd r10, r11, [sp, #16]\n\t" - /* Add */ - "ldrd r4, r5, [sp, #56]\n\t" - "ldrd r6, r7, [sp, #24]\n\t" - "adds r3, r3, #-1\n\t" - "adcs r8, r4, r6\n\t" - "adc r9, r5, r7\n\t" - /* Sub */ - "adds r12, r12, #-1\n\t" - "sbcs r10, r4, r6\n\t" - "sbc r11, r5, r7\n\t" - "mov r3, #-19\n\t" - "asr %[a], r9, #31\n\t" - /* Mask the modulus */ - "and r3, %[a], r3\n\t" - "and r12, %[a], #0x7fffffff\n\t" - /* Sub modulus (if overflow) */ - "ldrd r4, r5, [sp, #64]\n\t" - "subs r4, r4, r3\n\t" - "sbcs r5, r5, %[a]\n\t" - "strd r4, r5, [sp, #64]\n\t" - "ldrd r4, r5, [sp, #72]\n\t" - "sbcs r4, r4, %[a]\n\t" - "sbcs r5, r5, %[a]\n\t" - "strd r4, r5, [sp, #72]\n\t" - "ldrd r4, r5, [sp, #80]\n\t" - "sbcs r4, r4, %[a]\n\t" - "sbcs r5, r5, %[a]\n\t" - "strd r4, r5, [sp, #80]\n\t" - "sbcs r8, r8, %[a]\n\t" - "sbc r9, r9, r12\n\t" - "strd r8, r9, [sp, #88]\n\t" - "mov r3, #-19\n\t" - "asr %[a], r11, #31\n\t" - /* Mask the modulus */ - "and r3, %[a], r3\n\t" - "and r12, %[a], #0x7fffffff\n\t" - /* Add modulus (if underflow) */ - "ldrd r4, r5, [sp]\n\t" - "adds r4, r4, r3\n\t" - "adcs r5, r5, %[a]\n\t" - "strd r4, r5, [sp]\n\t" - "ldrd r4, r5, [sp, #8]\n\t" - "adcs r4, r4, %[a]\n\t" - "adcs r5, r5, %[a]\n\t" - "strd r4, r5, [sp, #8]\n\t" - "ldrd r4, r5, [sp, #16]\n\t" - "adcs r4, r4, %[a]\n\t" - "adcs r5, r5, %[a]\n\t" - "strd r4, r5, [sp, #16]\n\t" - "adcs r10, r10, %[a]\n\t" - "adc r11, r11, r12\n\t" - "strd r10, r11, [sp, #24]\n\t" - "add r2, sp, #0x60\n\t" - "add r1, sp, #0x80\n\t" - "ldr r0, [sp, #160]\n\t" - "bl fe_mul\n\t" - /* Sub */ - "ldrd r4, r5, [sp, #128]\n\t" - "ldrd r6, r7, [sp, #136]\n\t" - "ldrd r8, r9, [sp, #96]\n\t" - "ldrd r10, r11, [sp, #104]\n\t" - "subs r8, r4, r8\n\t" - "sbcs r9, r5, r9\n\t" - "sbcs r10, r6, r10\n\t" - "sbcs r11, r7, r11\n\t" - "strd r8, r9, [sp, #128]\n\t" - "strd r10, r11, [sp, #136]\n\t" - "ldrd r4, r5, [sp, #144]\n\t" - "ldrd r6, r7, [sp, #152]\n\t" - "ldrd r8, r9, [sp, #112]\n\t" - "ldrd r10, r11, [sp, #120]\n\t" - "sbcs r8, r4, r8\n\t" - "sbcs r9, r5, r9\n\t" - "sbcs r10, r6, r10\n\t" - "sbc r11, r7, r11\n\t" - "mov r3, #-19\n\t" - "asr %[a], r11, #31\n\t" - /* Mask the modulus */ - "and r3, %[a], r3\n\t" - "and r12, %[a], #0x7fffffff\n\t" - /* Add modulus (if underflow) */ - "ldrd r4, r5, [sp, #128]\n\t" - "ldrd r6, r7, [sp, #136]\n\t" - "adds r4, r4, r3\n\t" - "adcs r5, r5, %[a]\n\t" - "adcs r6, r6, %[a]\n\t" - "adcs r7, r7, %[a]\n\t" - "adcs r8, r8, %[a]\n\t" - "adcs r9, r9, %[a]\n\t" - "adcs r10, r10, %[a]\n\t" - "adc r11, r11, r12\n\t" - "strd r4, r5, [sp, #128]\n\t" - "strd r6, r7, [sp, #136]\n\t" - "strd r8, r9, [sp, #144]\n\t" - "strd r10, r11, [sp, #152]\n\t" - "add r1, sp, #0\n\t" - "add r0, sp, #0\n\t" - "bl fe_sq\n\t" - /* Multiply by 121666 */ - "ldrd r4, r5, [sp, #128]\n\t" - "ldrd r6, r7, [sp, #136]\n\t" - "ldrd r8, r9, [sp, #144]\n\t" - "ldrd r10, r11, [sp, #152]\n\t" - "movw r12, #0xdb42\n\t" - "movt r12, #1\n\t" - "umull r4, %[a], r4, r12\n\t" - "umull r5, r3, r5, r12\n\t" - "adds r5, r5, %[a]\n\t" - "adc %[a], r3, #0\n\t" - "umull r6, r3, r6, r12\n\t" - "adds r6, r6, %[a]\n\t" - "adc %[a], r3, #0\n\t" - "umull r7, r3, r7, r12\n\t" - "adds r7, r7, %[a]\n\t" - "adc %[a], r3, #0\n\t" - "umull r8, r3, r8, r12\n\t" - "adds r8, r8, %[a]\n\t" - "adc %[a], r3, #0\n\t" - "umull r9, r3, r9, r12\n\t" - "adds r9, r9, %[a]\n\t" - "adc %[a], r3, #0\n\t" - "umull r10, r3, r10, r12\n\t" - "adds r10, r10, %[a]\n\t" - "adc %[a], r3, #0\n\t" - "umull r11, r3, r11, r12\n\t" - "adds r11, r11, %[a]\n\t" - "adc %[a], r3, #0\n\t" - "mov r12, #19\n\t" - "lsl %[a], %[a], #1\n\t" - "orr %[a], %[a], r11, lsr #31\n\t" - "mul %[a], %[a], r12\n\t" - "and r11, r11, #0x7fffffff\n\t" - "adds r4, r4, %[a]\n\t" - "adcs r5, r5, #0\n\t" - "adcs r6, r6, #0\n\t" - "adcs r7, r7, #0\n\t" - "adcs r8, r8, #0\n\t" - "adcs r9, r9, #0\n\t" - "adcs r10, r10, #0\n\t" - "adc r11, r11, #0\n\t" - "strd r4, r5, [sp, #32]\n\t" - "strd r6, r7, [sp, #40]\n\t" - "strd r8, r9, [sp, #48]\n\t" - "strd r10, r11, [sp, #56]\n\t" - "add r1, sp, #0x40\n\t" - "add r0, sp, #0x40\n\t" - "bl fe_sq\n\t" - /* Add */ - "ldrd r4, r5, [sp, #96]\n\t" - "ldrd r6, r7, [sp, #104]\n\t" - "ldrd r8, r9, [sp, #32]\n\t" - "ldrd r10, r11, [sp, #40]\n\t" - "adds r8, r4, r8\n\t" - "adcs r9, r5, r9\n\t" - "adcs r10, r6, r10\n\t" - "adcs r11, r7, r11\n\t" - "strd r8, r9, [sp, #96]\n\t" - "strd r10, r11, [sp, #104]\n\t" - "ldrd r4, r5, [sp, #112]\n\t" - "ldrd r6, r7, [sp, #120]\n\t" - "ldrd r8, r9, [sp, #48]\n\t" - "ldrd r10, r11, [sp, #56]\n\t" - "adcs r8, r4, r8\n\t" - "adcs r9, r5, r9\n\t" - "adcs r10, r6, r10\n\t" - "adc r11, r7, r11\n\t" - "mov r3, #-19\n\t" - "asr %[a], r11, #31\n\t" - /* Mask the modulus */ - "and r3, %[a], r3\n\t" - "and r12, %[a], #0x7fffffff\n\t" - /* Sub modulus (if overflow) */ - "ldrd r4, r5, [sp, #96]\n\t" - "ldrd r6, r7, [sp, #104]\n\t" - "subs r4, r4, r3\n\t" - "sbcs r5, r5, %[a]\n\t" - "sbcs r6, r6, %[a]\n\t" - "sbcs r7, r7, %[a]\n\t" - "sbcs r8, r8, %[a]\n\t" - "sbcs r9, r9, %[a]\n\t" - "sbcs r10, r10, %[a]\n\t" - "sbc r11, r11, r12\n\t" - "strd r4, r5, [sp, #96]\n\t" - "strd r6, r7, [sp, #104]\n\t" - "strd r8, r9, [sp, #112]\n\t" - "strd r10, r11, [sp, #120]\n\t" - "add r2, sp, #0\n\t" - "ldr r1, [sp, #168]\n\t" - "add r0, sp, #32\n\t" - "bl fe_mul\n\t" - "add r2, sp, #0x60\n\t" - "add r1, sp, #0x80\n\t" - "add r0, sp, #0\n\t" - "bl fe_mul\n\t" - "ldr %[a], [sp, #176]\n\t" - "ldr %[n], [sp, #180]\n\t" - "subs %[n], %[n], #1\n\t" - "str %[n], [sp, #180]\n\t" - "bge L_curve25519_bits_%=\n\t" - "mov %[n], #31\n\t" - "str %[n], [sp, #180]\n\t" - "subs %[a], %[a], #4\n\t" - "str %[a], [sp, #176]\n\t" - "bge L_curve25519_words_%=\n\t" - /* Invert */ - "add r0, sp, #32\n\t" - "add r1, sp, #0\n\t" - "bl fe_sq\n\t" - "add r0, sp, #0x40\n\t" - "add r1, sp, #32\n\t" - "bl fe_sq\n\t" - "add r0, sp, #0x40\n\t" - "add r1, sp, #0x40\n\t" - "bl fe_sq\n\t" - "add r0, sp, #0x40\n\t" - "add r1, sp, #0\n\t" - "add r2, sp, #0x40\n\t" - "bl fe_mul\n\t" - "add r0, sp, #32\n\t" - "add r1, sp, #32\n\t" - "add r2, sp, #0x40\n\t" - "bl fe_mul\n\t" - "add r0, sp, #0x60\n\t" - "add r1, sp, #32\n\t" - "bl fe_sq\n\t" - "add r0, sp, #0x40\n\t" - "add r1, sp, #0x40\n\t" - "add r2, sp, #0x60\n\t" - "bl fe_mul\n\t" - "add r0, sp, #0x60\n\t" - "add r1, sp, #0x40\n\t" - "bl fe_sq\n\t" - "mov r4, #4\n\t" - "\n" - "L_curve25519_inv_1_%=: \n\t" - "add r0, sp, #0x60\n\t" - "add r1, sp, #0x60\n\t" - "bl fe_sq\n\t" - "sub r4, r4, #1\n\t" - "cmp r4, #0\n\t" - "bne L_curve25519_inv_1_%=\n\t" - "add r0, sp, #0x40\n\t" - "add r1, sp, #0x60\n\t" - "add r2, sp, #0x40\n\t" - "bl fe_mul\n\t" - "add r0, sp, #0x60\n\t" - "add r1, sp, #0x40\n\t" - "bl fe_sq\n\t" - "mov r4, #9\n\t" - "\n" - "L_curve25519_inv_2_%=: \n\t" - "add r0, sp, #0x60\n\t" - "add r1, sp, #0x60\n\t" - "bl fe_sq\n\t" - "sub r4, r4, #1\n\t" - "cmp r4, #0\n\t" - "bne L_curve25519_inv_2_%=\n\t" - "add r0, sp, #0x60\n\t" - "add r1, sp, #0x60\n\t" - "add r2, sp, #0x40\n\t" - "bl fe_mul\n\t" - "add r0, sp, #0x80\n\t" - "add r1, sp, #0x60\n\t" - "bl fe_sq\n\t" - "mov r4, #19\n\t" - "\n" - "L_curve25519_inv_3_%=: \n\t" - "add r0, sp, #0x80\n\t" - "add r1, sp, #0x80\n\t" - "bl fe_sq\n\t" - "sub r4, r4, #1\n\t" - "cmp r4, #0\n\t" - "bne L_curve25519_inv_3_%=\n\t" - "add r0, sp, #0x60\n\t" - "add r1, sp, #0x80\n\t" - "add r2, sp, #0x60\n\t" - "bl fe_mul\n\t" - "mov r4, #10\n\t" - "\n" - "L_curve25519_inv_4_%=: \n\t" - "add r0, sp, #0x60\n\t" - "add r1, sp, #0x60\n\t" - "bl fe_sq\n\t" - "sub r4, r4, #1\n\t" - "cmp r4, #0\n\t" - "bne L_curve25519_inv_4_%=\n\t" - "add r0, sp, #0x40\n\t" - "add r1, sp, #0x60\n\t" - "add r2, sp, #0x40\n\t" - "bl fe_mul\n\t" - "add r0, sp, #0x60\n\t" - "add r1, sp, #0x40\n\t" - "bl fe_sq\n\t" - "mov r4, #49\n\t" - "\n" - "L_curve25519_inv_5_%=: \n\t" - "add r0, sp, #0x60\n\t" - "add r1, sp, #0x60\n\t" - "bl fe_sq\n\t" - "sub r4, r4, #1\n\t" - "cmp r4, #0\n\t" - "bne L_curve25519_inv_5_%=\n\t" - "add r0, sp, #0x60\n\t" - "add r1, sp, #0x60\n\t" - "add r2, sp, #0x40\n\t" - "bl fe_mul\n\t" - "add r0, sp, #0x80\n\t" - "add r1, sp, #0x60\n\t" - "bl fe_sq\n\t" - "mov r4, #0x63\n\t" - "\n" - "L_curve25519_inv_6_%=: \n\t" - "add r0, sp, #0x80\n\t" - "add r1, sp, #0x80\n\t" - "bl fe_sq\n\t" - "sub r4, r4, #1\n\t" - "cmp r4, #0\n\t" - "bne L_curve25519_inv_6_%=\n\t" - "add r0, sp, #0x60\n\t" - "add r1, sp, #0x80\n\t" - "add r2, sp, #0x60\n\t" - "bl fe_mul\n\t" - "mov r4, #50\n\t" - "\n" - "L_curve25519_inv_7_%=: \n\t" - "add r0, sp, #0x60\n\t" - "add r1, sp, #0x60\n\t" - "bl fe_sq\n\t" - "sub r4, r4, #1\n\t" - "cmp r4, #0\n\t" - "bne L_curve25519_inv_7_%=\n\t" - "add r0, sp, #0x40\n\t" - "add r1, sp, #0x60\n\t" - "add r2, sp, #0x40\n\t" - "bl fe_mul\n\t" - "mov r4, #5\n\t" - "\n" - "L_curve25519_inv_8_%=: \n\t" - "add r0, sp, #0x40\n\t" - "add r1, sp, #0x40\n\t" - "bl fe_sq\n\t" - "sub r4, r4, #1\n\t" - "cmp r4, #0\n\t" - "bne L_curve25519_inv_8_%=\n\t" - "add r0, sp, #0\n\t" - "add r1, sp, #0x40\n\t" - "add r2, sp, #32\n\t" - "bl fe_mul\n\t" - "add r2, sp, #0\n\t" - "ldr r1, [sp, #160]\n\t" - "ldr r0, [sp, #160]\n\t" - "bl fe_mul\n\t" - "mov r0, #0\n\t" - "add sp, sp, #0xbc\n\t" - : [r] "+r" (r), [n] "+r" (n), [a] "+r" (a) - : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" - ); - return (uint32_t)(size_t)r; -} - -void fe_pow22523(fe r, const fe a) -{ - __asm__ __volatile__ ( - "sub sp, sp, #0x68\n\t" - /* pow22523 */ - "str %[r], [sp, #96]\n\t" - "str %[a], [sp, #100]\n\t" - "mov r0, sp\n\t" - "ldr r1, [sp, #100]\n\t" - "bl fe_sq\n\t" - "add r0, sp, #32\n\t" - "mov r1, sp\n\t" - "bl fe_sq\n\t" - "add r0, sp, #32\n\t" - "add r1, sp, #32\n\t" - "bl fe_sq\n\t" - "add r0, sp, #32\n\t" - "ldr r1, [sp, #100]\n\t" - "add r2, sp, #32\n\t" - "bl fe_mul\n\t" - "mov r0, sp\n\t" - "mov r1, sp\n\t" - "add r2, sp, #32\n\t" - "bl fe_mul\n\t" - "mov r0, sp\n\t" - "mov r1, sp\n\t" - "bl fe_sq\n\t" - "mov r0, sp\n\t" - "add r1, sp, #32\n\t" - "mov r2, sp\n\t" - "bl fe_mul\n\t" - "add r0, sp, #32\n\t" - "mov r1, sp\n\t" - "bl fe_sq\n\t" - "mov r4, #4\n\t" - "\n" - "L_fe_pow22523_1_%=: \n\t" - "add r0, sp, #32\n\t" - "add r1, sp, #32\n\t" - "bl fe_sq\n\t" - "sub r4, r4, #1\n\t" - "cmp r4, #0\n\t" - "bne L_fe_pow22523_1_%=\n\t" - "mov r0, sp\n\t" - "add r1, sp, #32\n\t" - "mov r2, sp\n\t" - "bl fe_mul\n\t" - "add r0, sp, #32\n\t" - "mov r1, sp\n\t" - "bl fe_sq\n\t" - "mov r4, #9\n\t" - "\n" - "L_fe_pow22523_2_%=: \n\t" - "add r0, sp, #32\n\t" - "add r1, sp, #32\n\t" - "bl fe_sq\n\t" - "sub r4, r4, #1\n\t" - "cmp r4, #0\n\t" - "bne L_fe_pow22523_2_%=\n\t" - "add r0, sp, #32\n\t" - "add r1, sp, #32\n\t" - "mov r2, sp\n\t" - "bl fe_mul\n\t" - "add r0, sp, #0x40\n\t" - "add r1, sp, #32\n\t" - "bl fe_sq\n\t" - "mov r4, #19\n\t" - "\n" - "L_fe_pow22523_3_%=: \n\t" - "add r0, sp, #0x40\n\t" - "add r1, sp, #0x40\n\t" - "bl fe_sq\n\t" - "sub r4, r4, #1\n\t" - "cmp r4, #0\n\t" - "bne L_fe_pow22523_3_%=\n\t" - "add r0, sp, #32\n\t" - "add r1, sp, #0x40\n\t" - "add r2, sp, #32\n\t" - "bl fe_mul\n\t" - "mov r4, #10\n\t" - "\n" - "L_fe_pow22523_4_%=: \n\t" - "add r0, sp, #32\n\t" - "add r1, sp, #32\n\t" - "bl fe_sq\n\t" - "sub r4, r4, #1\n\t" - "cmp r4, #0\n\t" - "bne L_fe_pow22523_4_%=\n\t" - "mov r0, sp\n\t" - "add r1, sp, #32\n\t" - "mov r2, sp\n\t" - "bl fe_mul\n\t" - "add r0, sp, #32\n\t" - "mov r1, sp\n\t" - "bl fe_sq\n\t" - "mov r4, #49\n\t" - "\n" - "L_fe_pow22523_5_%=: \n\t" - "add r0, sp, #32\n\t" - "add r1, sp, #32\n\t" - "bl fe_sq\n\t" - "sub r4, r4, #1\n\t" - "cmp r4, #0\n\t" - "bne L_fe_pow22523_5_%=\n\t" - "add r0, sp, #32\n\t" - "add r1, sp, #32\n\t" - "mov r2, sp\n\t" - "bl fe_mul\n\t" - "add r0, sp, #0x40\n\t" - "add r1, sp, #32\n\t" - "bl fe_sq\n\t" - "mov r4, #0x63\n\t" - "\n" - "L_fe_pow22523_6_%=: \n\t" - "add r0, sp, #0x40\n\t" - "add r1, sp, #0x40\n\t" - "bl fe_sq\n\t" - "sub r4, r4, #1\n\t" - "cmp r4, #0\n\t" - "bne L_fe_pow22523_6_%=\n\t" - "add r0, sp, #32\n\t" - "add r1, sp, #0x40\n\t" - "add r2, sp, #32\n\t" - "bl fe_mul\n\t" - "mov r4, #50\n\t" - "\n" - "L_fe_pow22523_7_%=: \n\t" - "add r0, sp, #32\n\t" - "add r1, sp, #32\n\t" - "bl fe_sq\n\t" - "sub r4, r4, #1\n\t" - "cmp r4, #0\n\t" - "bne L_fe_pow22523_7_%=\n\t" - "mov r0, sp\n\t" - "add r1, sp, #32\n\t" - "mov r2, sp\n\t" - "bl fe_mul\n\t" - "mov r4, #2\n\t" - "\n" - "L_fe_pow22523_8_%=: \n\t" - "mov r0, sp\n\t" - "mov r1, sp\n\t" - "bl fe_sq\n\t" - "sub r4, r4, #1\n\t" - "cmp r4, #0\n\t" - "bne L_fe_pow22523_8_%=\n\t" - "ldr r0, [sp, #96]\n\t" - "mov r1, sp\n\t" - "ldr r2, [sp, #100]\n\t" - "bl fe_mul\n\t" - "ldr %[a], [sp, #100]\n\t" - "ldr %[r], [sp, #96]\n\t" - "add sp, sp, #0x68\n\t" - : [r] "+r" (r), [a] "+r" (a) - : - : "memory", "lr", "r4" - ); -} - -void fe_ge_to_p2(fe rx, fe ry, fe rz, const fe px, const fe py, const fe pz, const fe pt) -{ - __asm__ __volatile__ ( - "sub sp, sp, #16\n\t" - "str %[rx], [sp]\n\t" - "str %[ry], [sp, #4]\n\t" - "str %[rz], [sp, #8]\n\t" - "str %[px], [sp, #12]\n\t" - "ldr r2, [sp, #32]\n\t" - "ldr r1, [sp, #12]\n\t" - "ldr r0, [sp]\n\t" - "bl fe_mul\n\t" - "ldr r2, [sp, #28]\n\t" - "ldr r1, [sp, #24]\n\t" - "ldr r0, [sp, #4]\n\t" - "bl fe_mul\n\t" - "ldr r2, [sp, #32]\n\t" - "ldr r1, [sp, #28]\n\t" - "ldr r0, [sp, #8]\n\t" - "bl fe_mul\n\t" - "add sp, sp, #16\n\t" - : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [px] "+r" (px), [py] "+r" (py), [pz] "+r" (pz), [pt] "+r" (pt) - : - : "memory", "lr" - ); -} - -void fe_ge_to_p3(fe rx, fe ry, fe rz, fe rt, const fe px, const fe py, const fe pz, const fe pt) -{ - __asm__ __volatile__ ( - "sub sp, sp, #16\n\t" - "str %[rx], [sp]\n\t" - "str %[ry], [sp, #4]\n\t" - "str %[rz], [sp, #8]\n\t" - "str %[rt], [sp, #12]\n\t" - "ldr r2, [sp, #36]\n\t" - "ldr r1, [sp, #24]\n\t" - "ldr r0, [sp]\n\t" - "bl fe_mul\n\t" - "ldr r2, [sp, #32]\n\t" - "ldr r1, [sp, #28]\n\t" - "ldr r0, [sp, #4]\n\t" - "bl fe_mul\n\t" - "ldr r2, [sp, #36]\n\t" - "ldr r1, [sp, #32]\n\t" - "ldr r0, [sp, #8]\n\t" - "bl fe_mul\n\t" - "ldr r2, [sp, #28]\n\t" - "ldr r1, [sp, #24]\n\t" - "ldr r0, [sp, #12]\n\t" - "bl fe_mul\n\t" - "add sp, sp, #16\n\t" - : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [rt] "+r" (rt), [px] "+r" (px), [py] "+r" (py), [pz] "+r" (pz), [pt] "+r" (pt) - : - : "memory", "lr" - ); -} - -void fe_ge_dbl(fe rx, fe ry, fe rz, fe rt, const fe px, const fe py, const fe pz) -{ - __asm__ __volatile__ ( - "sub sp, sp, #16\n\t" - "str %[rx], [sp]\n\t" - "str %[ry], [sp, #4]\n\t" - "str %[rz], [sp, #8]\n\t" - "str %[rt], [sp, #12]\n\t" - "ldr r1, [sp, #88]\n\t" - "ldr r0, [sp]\n\t" - "bl fe_sq\n\t" - "ldr r1, [sp, #92]\n\t" - "ldr r0, [sp, #8]\n\t" - "bl fe_sq\n\t" - "ldr r0, [sp, #4]\n\t" - "ldr r1, [sp, #88]\n\t" - "ldr r2, [sp, #92]\n\t" - /* Add */ - "ldrd %[rt], r4, [r1]\n\t" - "ldrd r5, r6, [r1, #8]\n\t" - "ldrd r7, r8, [r2]\n\t" - "ldrd r9, r10, [r2, #8]\n\t" - "adds r7, %[rt], r7\n\t" - "adcs r8, r4, r8\n\t" - "adcs r9, r5, r9\n\t" - "adcs r10, r6, r10\n\t" - "strd r7, r8, [r0]\n\t" - "strd r9, r10, [r0, #8]\n\t" - "ldrd %[rt], r4, [r1, #16]\n\t" - "ldrd r5, r6, [r1, #24]\n\t" - "ldrd r7, r8, [r2, #16]\n\t" - "ldrd r9, r10, [r2, #24]\n\t" - "adcs r7, %[rt], r7\n\t" - "adcs r8, r4, r8\n\t" - "adcs r9, r5, r9\n\t" - "adc r10, r6, r10\n\t" - "mov r12, #-19\n\t" - "asr r11, r10, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Sub modulus (if overflow) */ - "ldrd %[rt], r4, [r0]\n\t" - "ldrd r5, r6, [r0, #8]\n\t" - "subs %[rt], %[rt], r12\n\t" - "sbcs r4, r4, r11\n\t" - "sbcs r5, r5, r11\n\t" - "sbcs r6, r6, r11\n\t" - "sbcs r7, r7, r11\n\t" - "sbcs r8, r8, r11\n\t" - "sbcs r9, r9, r11\n\t" - "sbc r10, r10, lr\n\t" - "strd %[rt], r4, [r0]\n\t" - "strd r5, r6, [r0, #8]\n\t" - "strd r7, r8, [r0, #16]\n\t" - "strd r9, r10, [r0, #24]\n\t" - "ldr r1, [sp, #4]\n\t" - "ldr r0, [sp, #12]\n\t" - "bl fe_sq\n\t" - "ldr r0, [sp, #4]\n\t" - "ldr r1, [sp, #8]\n\t" - "ldr r2, [sp]\n\t" - /* Add-Sub */ - /* Add */ - "ldrd %[rt], r4, [r1]\n\t" - "ldrd r5, r6, [r2]\n\t" - "adds r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0]\n\t" - /* Sub */ - "subs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1]\n\t" - /* Add */ - "ldrd %[rt], r4, [r1, #8]\n\t" - "ldrd r5, r6, [r2, #8]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0, #8]\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1, #8]\n\t" - /* Add */ - "ldrd %[rt], r4, [r1, #16]\n\t" - "ldrd r5, r6, [r2, #16]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0, #16]\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1, #16]\n\t" - /* Add */ - "ldrd %[rt], r4, [r1, #24]\n\t" - "ldrd r5, r6, [r2, #24]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "adc r8, r4, r6\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "sbc r10, r4, r6\n\t" - "mov r12, #-19\n\t" - "asr r11, r8, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Sub modulus (if overflow) */ - "ldrd %[rt], r4, [r0]\n\t" - "subs %[rt], %[rt], r12\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0]\n\t" - "ldrd %[rt], r4, [r0, #8]\n\t" - "sbcs %[rt], %[rt], r11\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0, #8]\n\t" - "ldrd %[rt], r4, [r0, #16]\n\t" - "sbcs %[rt], %[rt], r11\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0, #16]\n\t" - "sbcs r7, r7, r11\n\t" - "sbc r8, r8, lr\n\t" - "strd r7, r8, [r0, #24]\n\t" - "mov r12, #-19\n\t" - "asr r11, r10, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Add modulus (if underflow) */ - "ldrd %[rt], r4, [r1]\n\t" - "adds %[rt], %[rt], r12\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1]\n\t" - "ldrd %[rt], r4, [r1, #8]\n\t" - "adcs %[rt], %[rt], r11\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1, #8]\n\t" - "ldrd %[rt], r4, [r1, #16]\n\t" - "adcs %[rt], %[rt], r11\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1, #16]\n\t" - "adcs r9, r9, r11\n\t" - "adc r10, r10, lr\n\t" - "strd r9, r10, [r1, #24]\n\t" - "ldr r0, [sp]\n\t" - "ldr r1, [sp, #12]\n\t" - "ldr r2, [sp, #4]\n\t" - /* Sub */ - "ldrd %[rt], r4, [r1]\n\t" - "ldrd r5, r6, [r1, #8]\n\t" - "ldrd r7, r8, [r2]\n\t" - "ldrd r9, r10, [r2, #8]\n\t" - "subs r7, %[rt], r7\n\t" - "sbcs r8, r4, r8\n\t" - "sbcs r9, r5, r9\n\t" - "sbcs r10, r6, r10\n\t" - "strd r7, r8, [r0]\n\t" - "strd r9, r10, [r0, #8]\n\t" - "ldrd %[rt], r4, [r1, #16]\n\t" - "ldrd r5, r6, [r1, #24]\n\t" - "ldrd r7, r8, [r2, #16]\n\t" - "ldrd r9, r10, [r2, #24]\n\t" - "sbcs r7, %[rt], r7\n\t" - "sbcs r8, r4, r8\n\t" - "sbcs r9, r5, r9\n\t" - "sbc r10, r6, r10\n\t" - "mov r12, #-19\n\t" - "asr r11, r10, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Add modulus (if underflow) */ - "ldrd %[rt], r4, [r0]\n\t" - "ldrd r5, r6, [r0, #8]\n\t" - "adds %[rt], %[rt], r12\n\t" - "adcs r4, r4, r11\n\t" - "adcs r5, r5, r11\n\t" - "adcs r6, r6, r11\n\t" - "adcs r7, r7, r11\n\t" - "adcs r8, r8, r11\n\t" - "adcs r9, r9, r11\n\t" - "adc r10, r10, lr\n\t" - "strd %[rt], r4, [r0]\n\t" - "strd r5, r6, [r0, #8]\n\t" - "strd r7, r8, [r0, #16]\n\t" - "strd r9, r10, [r0, #24]\n\t" - "ldr r1, [sp, #96]\n\t" - "ldr r0, [sp, #12]\n\t" - "bl fe_sq2\n\t" - "ldr r0, [sp, #12]\n\t" - "ldr r1, [sp, #8]\n\t" - /* Sub */ - "ldrd %[rt], r4, [r0]\n\t" - "ldrd r5, r6, [r0, #8]\n\t" - "ldrd r7, r8, [r1]\n\t" - "ldrd r9, r10, [r1, #8]\n\t" - "subs r7, %[rt], r7\n\t" - "sbcs r8, r4, r8\n\t" - "sbcs r9, r5, r9\n\t" - "sbcs r10, r6, r10\n\t" - "strd r7, r8, [r0]\n\t" - "strd r9, r10, [r0, #8]\n\t" - "ldrd %[rt], r4, [r0, #16]\n\t" - "ldrd r5, r6, [r0, #24]\n\t" - "ldrd r7, r8, [r1, #16]\n\t" - "ldrd r9, r10, [r1, #24]\n\t" - "sbcs r7, %[rt], r7\n\t" - "sbcs r8, r4, r8\n\t" - "sbcs r9, r5, r9\n\t" - "sbc r10, r6, r10\n\t" - "mov r12, #-19\n\t" - "asr r11, r10, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Add modulus (if underflow) */ - "ldrd %[rt], r4, [r0]\n\t" - "ldrd r5, r6, [r0, #8]\n\t" - "adds %[rt], %[rt], r12\n\t" - "adcs r4, r4, r11\n\t" - "adcs r5, r5, r11\n\t" - "adcs r6, r6, r11\n\t" - "adcs r7, r7, r11\n\t" - "adcs r8, r8, r11\n\t" - "adcs r9, r9, r11\n\t" - "adc r10, r10, lr\n\t" - "strd %[rt], r4, [r0]\n\t" - "strd r5, r6, [r0, #8]\n\t" - "strd r7, r8, [r0, #16]\n\t" - "strd r9, r10, [r0, #24]\n\t" - "add sp, sp, #16\n\t" - : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [rt] "+r" (rt), [px] "+r" (px), [py] "+r" (py), [pz] "+r" (pz) - : - : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" - ); -} - -void fe_ge_madd(fe rx, fe ry, fe rz, fe rt, const fe px, const fe py, const fe pz, const fe pt, const fe qxy2d, const fe qyplusx, const fe qyminusx) -{ - __asm__ __volatile__ ( - "sub sp, sp, #32\n\t" - "str %[rx], [sp]\n\t" - "str %[ry], [sp, #4]\n\t" - "str %[rz], [sp, #8]\n\t" - "str %[rt], [sp, #12]\n\t" - "ldr r0, [sp]\n\t" - "ldr r1, [sp, #108]\n\t" - "ldr r2, [sp, #104]\n\t" - /* Add */ - "ldrd %[rt], r4, [r1]\n\t" - "ldrd r5, r6, [r1, #8]\n\t" - "ldrd r7, r8, [r2]\n\t" - "ldrd r9, r10, [r2, #8]\n\t" - "adds r7, %[rt], r7\n\t" - "adcs r8, r4, r8\n\t" - "adcs r9, r5, r9\n\t" - "adcs r10, r6, r10\n\t" - "strd r7, r8, [r0]\n\t" - "strd r9, r10, [r0, #8]\n\t" - "ldrd %[rt], r4, [r1, #16]\n\t" - "ldrd r5, r6, [r1, #24]\n\t" - "ldrd r7, r8, [r2, #16]\n\t" - "ldrd r9, r10, [r2, #24]\n\t" - "adcs r7, %[rt], r7\n\t" - "adcs r8, r4, r8\n\t" - "adcs r9, r5, r9\n\t" - "adc r10, r6, r10\n\t" - "mov r12, #-19\n\t" - "asr r11, r10, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Sub modulus (if overflow) */ - "ldrd %[rt], r4, [r0]\n\t" - "ldrd r5, r6, [r0, #8]\n\t" - "subs %[rt], %[rt], r12\n\t" - "sbcs r4, r4, r11\n\t" - "sbcs r5, r5, r11\n\t" - "sbcs r6, r6, r11\n\t" - "sbcs r7, r7, r11\n\t" - "sbcs r8, r8, r11\n\t" - "sbcs r9, r9, r11\n\t" - "sbc r10, r10, lr\n\t" - "strd %[rt], r4, [r0]\n\t" - "strd r5, r6, [r0, #8]\n\t" - "strd r7, r8, [r0, #16]\n\t" - "strd r9, r10, [r0, #24]\n\t" - "ldr r0, [sp, #4]\n\t" - "ldr r1, [sp, #108]\n\t" - "ldr r2, [sp, #104]\n\t" - /* Sub */ - "ldrd %[rt], r4, [r1]\n\t" - "ldrd r5, r6, [r1, #8]\n\t" - "ldrd r7, r8, [r2]\n\t" - "ldrd r9, r10, [r2, #8]\n\t" - "subs r7, %[rt], r7\n\t" - "sbcs r8, r4, r8\n\t" - "sbcs r9, r5, r9\n\t" - "sbcs r10, r6, r10\n\t" - "strd r7, r8, [r0]\n\t" - "strd r9, r10, [r0, #8]\n\t" - "ldrd %[rt], r4, [r1, #16]\n\t" - "ldrd r5, r6, [r1, #24]\n\t" - "ldrd r7, r8, [r2, #16]\n\t" - "ldrd r9, r10, [r2, #24]\n\t" - "sbcs r7, %[rt], r7\n\t" - "sbcs r8, r4, r8\n\t" - "sbcs r9, r5, r9\n\t" - "sbc r10, r6, r10\n\t" - "mov r12, #-19\n\t" - "asr r11, r10, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Add modulus (if underflow) */ - "ldrd %[rt], r4, [r0]\n\t" - "ldrd r5, r6, [r0, #8]\n\t" - "adds %[rt], %[rt], r12\n\t" - "adcs r4, r4, r11\n\t" - "adcs r5, r5, r11\n\t" - "adcs r6, r6, r11\n\t" - "adcs r7, r7, r11\n\t" - "adcs r8, r8, r11\n\t" - "adcs r9, r9, r11\n\t" - "adc r10, r10, lr\n\t" - "strd %[rt], r4, [r0]\n\t" - "strd r5, r6, [r0, #8]\n\t" - "strd r7, r8, [r0, #16]\n\t" - "strd r9, r10, [r0, #24]\n\t" - "ldr r2, [sp, #124]\n\t" - "ldr r1, [sp]\n\t" - "ldr r0, [sp, #8]\n\t" - "bl fe_mul\n\t" - "ldr r2, [sp, #128]\n\t" - "ldr r1, [sp, #4]\n\t" - "ldr r0, [sp, #4]\n\t" - "bl fe_mul\n\t" - "ldr r2, [sp, #116]\n\t" - "ldr r1, [sp, #120]\n\t" - "ldr r0, [sp, #12]\n\t" - "bl fe_mul\n\t" - "ldr r0, [sp, #4]\n\t" - "ldr r1, [sp]\n\t" - "ldr r2, [sp, #8]\n\t" - /* Add-Sub */ - /* Add */ - "ldrd %[rt], r4, [r2]\n\t" - "ldrd r5, r6, [r0]\n\t" - "adds r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0]\n\t" - /* Sub */ - "subs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1]\n\t" - /* Add */ - "ldrd %[rt], r4, [r2, #8]\n\t" - "ldrd r5, r6, [r0, #8]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0, #8]\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1, #8]\n\t" - /* Add */ - "ldrd %[rt], r4, [r2, #16]\n\t" - "ldrd r5, r6, [r0, #16]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0, #16]\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1, #16]\n\t" - /* Add */ - "ldrd %[rt], r4, [r2, #24]\n\t" - "ldrd r5, r6, [r0, #24]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "adc r8, r4, r6\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "sbc r10, r4, r6\n\t" - "mov r12, #-19\n\t" - "asr r11, r8, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Sub modulus (if overflow) */ - "ldrd %[rt], r4, [r0]\n\t" - "subs %[rt], %[rt], r12\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0]\n\t" - "ldrd %[rt], r4, [r0, #8]\n\t" - "sbcs %[rt], %[rt], r11\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0, #8]\n\t" - "ldrd %[rt], r4, [r0, #16]\n\t" - "sbcs %[rt], %[rt], r11\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0, #16]\n\t" - "sbcs r7, r7, r11\n\t" - "sbc r8, r8, lr\n\t" - "strd r7, r8, [r0, #24]\n\t" - "mov r12, #-19\n\t" - "asr r11, r10, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Add modulus (if underflow) */ - "ldrd %[rt], r4, [r1]\n\t" - "adds %[rt], %[rt], r12\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1]\n\t" - "ldrd %[rt], r4, [r1, #8]\n\t" - "adcs %[rt], %[rt], r11\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1, #8]\n\t" - "ldrd %[rt], r4, [r1, #16]\n\t" - "adcs %[rt], %[rt], r11\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1, #16]\n\t" - "adcs r9, r9, r11\n\t" - "adc r10, r10, lr\n\t" - "strd r9, r10, [r1, #24]\n\t" - "ldr r0, [sp, #8]\n\t" - "ldr r1, [sp, #112]\n\t" - /* Double */ - "ldrd %[rt], r4, [r1]\n\t" - "ldrd r5, r6, [r1, #8]\n\t" - "ldrd r7, r8, [r1, #16]\n\t" - "ldrd r9, r10, [r1, #24]\n\t" - "adds %[rt], %[rt], %[rt]\n\t" - "adcs r4, r4, r4\n\t" - "adcs r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" - "adcs r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adc r10, r10, r10\n\t" - "mov r12, #-19\n\t" - "asr r11, r10, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Sub modulus (if overflow) */ - "subs %[rt], %[rt], r12\n\t" - "sbcs r4, r4, r11\n\t" - "sbcs r5, r5, r11\n\t" - "sbcs r6, r6, r11\n\t" - "sbcs r7, r7, r11\n\t" - "sbcs r8, r8, r11\n\t" - "sbcs r9, r9, r11\n\t" - "sbc r10, r10, lr\n\t" - "strd %[rt], r4, [r0]\n\t" - "strd r5, r6, [r0, #8]\n\t" - "strd r7, r8, [r0, #16]\n\t" - "strd r9, r10, [r0, #24]\n\t" - "ldr r0, [sp, #8]\n\t" - "ldr r1, [sp, #12]\n\t" - /* Add-Sub */ - /* Add */ - "ldrd %[rt], r4, [r0]\n\t" - "ldrd r5, r6, [r1]\n\t" - "adds r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0]\n\t" - /* Sub */ - "subs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1]\n\t" - /* Add */ - "ldrd %[rt], r4, [r0, #8]\n\t" - "ldrd r5, r6, [r1, #8]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0, #8]\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1, #8]\n\t" - /* Add */ - "ldrd %[rt], r4, [r0, #16]\n\t" - "ldrd r5, r6, [r1, #16]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0, #16]\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1, #16]\n\t" - /* Add */ - "ldrd %[rt], r4, [r0, #24]\n\t" - "ldrd r5, r6, [r1, #24]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "adc r8, r4, r6\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "sbc r10, r4, r6\n\t" - "mov r12, #-19\n\t" - "asr r11, r8, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Sub modulus (if overflow) */ - "ldrd %[rt], r4, [r0]\n\t" - "subs %[rt], %[rt], r12\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0]\n\t" - "ldrd %[rt], r4, [r0, #8]\n\t" - "sbcs %[rt], %[rt], r11\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0, #8]\n\t" - "ldrd %[rt], r4, [r0, #16]\n\t" - "sbcs %[rt], %[rt], r11\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0, #16]\n\t" - "sbcs r7, r7, r11\n\t" - "sbc r8, r8, lr\n\t" - "strd r7, r8, [r0, #24]\n\t" - "mov r12, #-19\n\t" - "asr r11, r10, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Add modulus (if underflow) */ - "ldrd %[rt], r4, [r1]\n\t" - "adds %[rt], %[rt], r12\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1]\n\t" - "ldrd %[rt], r4, [r1, #8]\n\t" - "adcs %[rt], %[rt], r11\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1, #8]\n\t" - "ldrd %[rt], r4, [r1, #16]\n\t" - "adcs %[rt], %[rt], r11\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1, #16]\n\t" - "adcs r9, r9, r11\n\t" - "adc r10, r10, lr\n\t" - "strd r9, r10, [r1, #24]\n\t" - "add sp, sp, #32\n\t" - : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [rt] "+r" (rt), [px] "+r" (px), [py] "+r" (py), [pz] "+r" (pz), [pt] "+r" (pt) - : - : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" - ); - (void)qxy2d; - (void)qyplusx; - (void)qyminusx; -} - -void fe_ge_msub(fe rx, fe ry, fe rz, fe rt, const fe px, const fe py, const fe pz, const fe pt, const fe qxy2d, const fe qyplusx, const fe qyminusx) -{ - __asm__ __volatile__ ( - "sub sp, sp, #32\n\t" - "str %[rx], [sp]\n\t" - "str %[ry], [sp, #4]\n\t" - "str %[rz], [sp, #8]\n\t" - "str %[rt], [sp, #12]\n\t" - "ldr r0, [sp]\n\t" - "ldr r1, [sp, #108]\n\t" - "ldr r2, [sp, #104]\n\t" - /* Add */ - "ldrd %[rt], r4, [r1]\n\t" - "ldrd r5, r6, [r1, #8]\n\t" - "ldrd r7, r8, [r2]\n\t" - "ldrd r9, r10, [r2, #8]\n\t" - "adds r7, %[rt], r7\n\t" - "adcs r8, r4, r8\n\t" - "adcs r9, r5, r9\n\t" - "adcs r10, r6, r10\n\t" - "strd r7, r8, [r0]\n\t" - "strd r9, r10, [r0, #8]\n\t" - "ldrd %[rt], r4, [r1, #16]\n\t" - "ldrd r5, r6, [r1, #24]\n\t" - "ldrd r7, r8, [r2, #16]\n\t" - "ldrd r9, r10, [r2, #24]\n\t" - "adcs r7, %[rt], r7\n\t" - "adcs r8, r4, r8\n\t" - "adcs r9, r5, r9\n\t" - "adc r10, r6, r10\n\t" - "mov r12, #-19\n\t" - "asr r11, r10, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Sub modulus (if overflow) */ - "ldrd %[rt], r4, [r0]\n\t" - "ldrd r5, r6, [r0, #8]\n\t" - "subs %[rt], %[rt], r12\n\t" - "sbcs r4, r4, r11\n\t" - "sbcs r5, r5, r11\n\t" - "sbcs r6, r6, r11\n\t" - "sbcs r7, r7, r11\n\t" - "sbcs r8, r8, r11\n\t" - "sbcs r9, r9, r11\n\t" - "sbc r10, r10, lr\n\t" - "strd %[rt], r4, [r0]\n\t" - "strd r5, r6, [r0, #8]\n\t" - "strd r7, r8, [r0, #16]\n\t" - "strd r9, r10, [r0, #24]\n\t" - "ldr r0, [sp, #4]\n\t" - "ldr r1, [sp, #108]\n\t" - "ldr r2, [sp, #104]\n\t" - /* Sub */ - "ldrd %[rt], r4, [r1]\n\t" - "ldrd r5, r6, [r1, #8]\n\t" - "ldrd r7, r8, [r2]\n\t" - "ldrd r9, r10, [r2, #8]\n\t" - "subs r7, %[rt], r7\n\t" - "sbcs r8, r4, r8\n\t" - "sbcs r9, r5, r9\n\t" - "sbcs r10, r6, r10\n\t" - "strd r7, r8, [r0]\n\t" - "strd r9, r10, [r0, #8]\n\t" - "ldrd %[rt], r4, [r1, #16]\n\t" - "ldrd r5, r6, [r1, #24]\n\t" - "ldrd r7, r8, [r2, #16]\n\t" - "ldrd r9, r10, [r2, #24]\n\t" - "sbcs r7, %[rt], r7\n\t" - "sbcs r8, r4, r8\n\t" - "sbcs r9, r5, r9\n\t" - "sbc r10, r6, r10\n\t" - "mov r12, #-19\n\t" - "asr r11, r10, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Add modulus (if underflow) */ - "ldrd %[rt], r4, [r0]\n\t" - "ldrd r5, r6, [r0, #8]\n\t" - "adds %[rt], %[rt], r12\n\t" - "adcs r4, r4, r11\n\t" - "adcs r5, r5, r11\n\t" - "adcs r6, r6, r11\n\t" - "adcs r7, r7, r11\n\t" - "adcs r8, r8, r11\n\t" - "adcs r9, r9, r11\n\t" - "adc r10, r10, lr\n\t" - "strd %[rt], r4, [r0]\n\t" - "strd r5, r6, [r0, #8]\n\t" - "strd r7, r8, [r0, #16]\n\t" - "strd r9, r10, [r0, #24]\n\t" - "ldr r2, [sp, #128]\n\t" - "ldr r1, [sp]\n\t" - "ldr r0, [sp, #8]\n\t" - "bl fe_mul\n\t" - "ldr r2, [sp, #124]\n\t" - "ldr r1, [sp, #4]\n\t" - "ldr r0, [sp, #4]\n\t" - "bl fe_mul\n\t" - "ldr r2, [sp, #116]\n\t" - "ldr r1, [sp, #120]\n\t" - "ldr r0, [sp, #12]\n\t" - "bl fe_mul\n\t" - "ldr r0, [sp, #4]\n\t" - "ldr r1, [sp]\n\t" - "ldr r2, [sp, #8]\n\t" - /* Add-Sub */ - /* Add */ - "ldrd %[rt], r4, [r2]\n\t" - "ldrd r5, r6, [r0]\n\t" - "adds r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0]\n\t" - /* Sub */ - "subs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1]\n\t" - /* Add */ - "ldrd %[rt], r4, [r2, #8]\n\t" - "ldrd r5, r6, [r0, #8]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0, #8]\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1, #8]\n\t" - /* Add */ - "ldrd %[rt], r4, [r2, #16]\n\t" - "ldrd r5, r6, [r0, #16]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0, #16]\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1, #16]\n\t" - /* Add */ - "ldrd %[rt], r4, [r2, #24]\n\t" - "ldrd r5, r6, [r0, #24]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "adc r8, r4, r6\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "sbc r10, r4, r6\n\t" - "mov r12, #-19\n\t" - "asr r11, r8, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Sub modulus (if overflow) */ - "ldrd %[rt], r4, [r0]\n\t" - "subs %[rt], %[rt], r12\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0]\n\t" - "ldrd %[rt], r4, [r0, #8]\n\t" - "sbcs %[rt], %[rt], r11\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0, #8]\n\t" - "ldrd %[rt], r4, [r0, #16]\n\t" - "sbcs %[rt], %[rt], r11\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0, #16]\n\t" - "sbcs r7, r7, r11\n\t" - "sbc r8, r8, lr\n\t" - "strd r7, r8, [r0, #24]\n\t" - "mov r12, #-19\n\t" - "asr r11, r10, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Add modulus (if underflow) */ - "ldrd %[rt], r4, [r1]\n\t" - "adds %[rt], %[rt], r12\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1]\n\t" - "ldrd %[rt], r4, [r1, #8]\n\t" - "adcs %[rt], %[rt], r11\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1, #8]\n\t" - "ldrd %[rt], r4, [r1, #16]\n\t" - "adcs %[rt], %[rt], r11\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1, #16]\n\t" - "adcs r9, r9, r11\n\t" - "adc r10, r10, lr\n\t" - "strd r9, r10, [r1, #24]\n\t" - "ldr r0, [sp, #8]\n\t" - "ldr r1, [sp, #112]\n\t" - /* Double */ - "ldrd %[rt], r4, [r1]\n\t" - "ldrd r5, r6, [r1, #8]\n\t" - "ldrd r7, r8, [r1, #16]\n\t" - "ldrd r9, r10, [r1, #24]\n\t" - "adds %[rt], %[rt], %[rt]\n\t" - "adcs r4, r4, r4\n\t" - "adcs r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" - "adcs r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adc r10, r10, r10\n\t" - "mov r12, #-19\n\t" - "asr r11, r10, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Sub modulus (if overflow) */ - "subs %[rt], %[rt], r12\n\t" - "sbcs r4, r4, r11\n\t" - "sbcs r5, r5, r11\n\t" - "sbcs r6, r6, r11\n\t" - "sbcs r7, r7, r11\n\t" - "sbcs r8, r8, r11\n\t" - "sbcs r9, r9, r11\n\t" - "sbc r10, r10, lr\n\t" - "strd %[rt], r4, [r0]\n\t" - "strd r5, r6, [r0, #8]\n\t" - "strd r7, r8, [r0, #16]\n\t" - "strd r9, r10, [r0, #24]\n\t" - "ldr r0, [sp, #12]\n\t" - "ldr r1, [sp, #8]\n\t" - /* Add-Sub */ - /* Add */ - "ldrd %[rt], r4, [r1]\n\t" - "ldrd r5, r6, [r0]\n\t" - "adds r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0]\n\t" - /* Sub */ - "subs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1]\n\t" - /* Add */ - "ldrd %[rt], r4, [r1, #8]\n\t" - "ldrd r5, r6, [r0, #8]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0, #8]\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1, #8]\n\t" - /* Add */ - "ldrd %[rt], r4, [r1, #16]\n\t" - "ldrd r5, r6, [r0, #16]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0, #16]\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1, #16]\n\t" - /* Add */ - "ldrd %[rt], r4, [r1, #24]\n\t" - "ldrd r5, r6, [r0, #24]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "adc r8, r4, r6\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "sbc r10, r4, r6\n\t" - "mov r12, #-19\n\t" - "asr r11, r8, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Sub modulus (if overflow) */ - "ldrd %[rt], r4, [r0]\n\t" - "subs %[rt], %[rt], r12\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0]\n\t" - "ldrd %[rt], r4, [r0, #8]\n\t" - "sbcs %[rt], %[rt], r11\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0, #8]\n\t" - "ldrd %[rt], r4, [r0, #16]\n\t" - "sbcs %[rt], %[rt], r11\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0, #16]\n\t" - "sbcs r7, r7, r11\n\t" - "sbc r8, r8, lr\n\t" - "strd r7, r8, [r0, #24]\n\t" - "mov r12, #-19\n\t" - "asr r11, r10, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Add modulus (if underflow) */ - "ldrd %[rt], r4, [r1]\n\t" - "adds %[rt], %[rt], r12\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1]\n\t" - "ldrd %[rt], r4, [r1, #8]\n\t" - "adcs %[rt], %[rt], r11\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1, #8]\n\t" - "ldrd %[rt], r4, [r1, #16]\n\t" - "adcs %[rt], %[rt], r11\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1, #16]\n\t" - "adcs r9, r9, r11\n\t" - "adc r10, r10, lr\n\t" - "strd r9, r10, [r1, #24]\n\t" - "add sp, sp, #32\n\t" - : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [rt] "+r" (rt), [px] "+r" (px), [py] "+r" (py), [pz] "+r" (pz), [pt] "+r" (pt) - : - : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" - ); - (void)qxy2d; - (void)qyplusx; - (void)qyminusx; -} - -void fe_ge_add(fe rx, fe ry, fe rz, fe rt, const fe px, const fe py, const fe pz, const fe pt, const fe qz, const fe qt2d, const fe qyplusx, const fe qyminusx) -{ - __asm__ __volatile__ ( - "sub sp, sp, #0x60\n\t" - "str %[rx], [sp]\n\t" - "str %[ry], [sp, #4]\n\t" - "str %[rz], [sp, #8]\n\t" - "str %[rt], [sp, #12]\n\t" - "ldr r0, [sp]\n\t" - "ldr r1, [sp, #172]\n\t" - "ldr r2, [sp, #168]\n\t" - /* Add */ - "ldrd %[rt], r4, [r1]\n\t" - "ldrd r5, r6, [r1, #8]\n\t" - "ldrd r7, r8, [r2]\n\t" - "ldrd r9, r10, [r2, #8]\n\t" - "adds r7, %[rt], r7\n\t" - "adcs r8, r4, r8\n\t" - "adcs r9, r5, r9\n\t" - "adcs r10, r6, r10\n\t" - "strd r7, r8, [r0]\n\t" - "strd r9, r10, [r0, #8]\n\t" - "ldrd %[rt], r4, [r1, #16]\n\t" - "ldrd r5, r6, [r1, #24]\n\t" - "ldrd r7, r8, [r2, #16]\n\t" - "ldrd r9, r10, [r2, #24]\n\t" - "adcs r7, %[rt], r7\n\t" - "adcs r8, r4, r8\n\t" - "adcs r9, r5, r9\n\t" - "adc r10, r6, r10\n\t" - "mov r12, #-19\n\t" - "asr r11, r10, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Sub modulus (if overflow) */ - "ldrd %[rt], r4, [r0]\n\t" - "ldrd r5, r6, [r0, #8]\n\t" - "subs %[rt], %[rt], r12\n\t" - "sbcs r4, r4, r11\n\t" - "sbcs r5, r5, r11\n\t" - "sbcs r6, r6, r11\n\t" - "sbcs r7, r7, r11\n\t" - "sbcs r8, r8, r11\n\t" - "sbcs r9, r9, r11\n\t" - "sbc r10, r10, lr\n\t" - "strd %[rt], r4, [r0]\n\t" - "strd r5, r6, [r0, #8]\n\t" - "strd r7, r8, [r0, #16]\n\t" - "strd r9, r10, [r0, #24]\n\t" - "ldr r0, [sp, #4]\n\t" - "ldr r1, [sp, #172]\n\t" - "ldr r2, [sp, #168]\n\t" - /* Sub */ - "ldrd %[rt], r4, [r1]\n\t" - "ldrd r5, r6, [r1, #8]\n\t" - "ldrd r7, r8, [r2]\n\t" - "ldrd r9, r10, [r2, #8]\n\t" - "subs r7, %[rt], r7\n\t" - "sbcs r8, r4, r8\n\t" - "sbcs r9, r5, r9\n\t" - "sbcs r10, r6, r10\n\t" - "strd r7, r8, [r0]\n\t" - "strd r9, r10, [r0, #8]\n\t" - "ldrd %[rt], r4, [r1, #16]\n\t" - "ldrd r5, r6, [r1, #24]\n\t" - "ldrd r7, r8, [r2, #16]\n\t" - "ldrd r9, r10, [r2, #24]\n\t" - "sbcs r7, %[rt], r7\n\t" - "sbcs r8, r4, r8\n\t" - "sbcs r9, r5, r9\n\t" - "sbc r10, r6, r10\n\t" - "mov r12, #-19\n\t" - "asr r11, r10, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Add modulus (if underflow) */ - "ldrd %[rt], r4, [r0]\n\t" - "ldrd r5, r6, [r0, #8]\n\t" - "adds %[rt], %[rt], r12\n\t" - "adcs r4, r4, r11\n\t" - "adcs r5, r5, r11\n\t" - "adcs r6, r6, r11\n\t" - "adcs r7, r7, r11\n\t" - "adcs r8, r8, r11\n\t" - "adcs r9, r9, r11\n\t" - "adc r10, r10, lr\n\t" - "strd %[rt], r4, [r0]\n\t" - "strd r5, r6, [r0, #8]\n\t" - "strd r7, r8, [r0, #16]\n\t" - "strd r9, r10, [r0, #24]\n\t" - "ldr r2, [sp, #192]\n\t" - "ldr r1, [sp]\n\t" - "ldr r0, [sp, #8]\n\t" - "bl fe_mul\n\t" - "ldr r2, [sp, #196]\n\t" - "ldr r1, [sp, #4]\n\t" - "ldr r0, [sp, #4]\n\t" - "bl fe_mul\n\t" - "ldr r2, [sp, #180]\n\t" - "ldr r1, [sp, #188]\n\t" - "ldr r0, [sp, #12]\n\t" - "bl fe_mul\n\t" - "ldr r2, [sp, #184]\n\t" - "ldr r1, [sp, #176]\n\t" - "ldr r0, [sp]\n\t" - "bl fe_mul\n\t" - "add r0, sp, #16\n\t" - "ldr r1, [sp]\n\t" - /* Double */ - "ldrd %[rt], r4, [r1]\n\t" - "ldrd r5, r6, [r1, #8]\n\t" - "ldrd r7, r8, [r1, #16]\n\t" - "ldrd r9, r10, [r1, #24]\n\t" - "adds %[rt], %[rt], %[rt]\n\t" - "adcs r4, r4, r4\n\t" - "adcs r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" - "adcs r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adc r10, r10, r10\n\t" - "mov r12, #-19\n\t" - "asr r11, r10, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Sub modulus (if overflow) */ - "subs %[rt], %[rt], r12\n\t" - "sbcs r4, r4, r11\n\t" - "sbcs r5, r5, r11\n\t" - "sbcs r6, r6, r11\n\t" - "sbcs r7, r7, r11\n\t" - "sbcs r8, r8, r11\n\t" - "sbcs r9, r9, r11\n\t" - "sbc r10, r10, lr\n\t" - "strd %[rt], r4, [r0]\n\t" - "strd r5, r6, [r0, #8]\n\t" - "strd r7, r8, [r0, #16]\n\t" - "strd r9, r10, [r0, #24]\n\t" - "ldr r0, [sp, #4]\n\t" - "ldr r1, [sp]\n\t" - "ldr r2, [sp, #8]\n\t" - /* Add-Sub */ - /* Add */ - "ldrd %[rt], r4, [r2]\n\t" - "ldrd r5, r6, [r0]\n\t" - "adds r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0]\n\t" - /* Sub */ - "subs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1]\n\t" - /* Add */ - "ldrd %[rt], r4, [r2, #8]\n\t" - "ldrd r5, r6, [r0, #8]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0, #8]\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1, #8]\n\t" - /* Add */ - "ldrd %[rt], r4, [r2, #16]\n\t" - "ldrd r5, r6, [r0, #16]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0, #16]\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1, #16]\n\t" - /* Add */ - "ldrd %[rt], r4, [r2, #24]\n\t" - "ldrd r5, r6, [r0, #24]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "adc r8, r4, r6\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "sbc r10, r4, r6\n\t" - "mov r12, #-19\n\t" - "asr r11, r8, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Sub modulus (if overflow) */ - "ldrd %[rt], r4, [r0]\n\t" - "subs %[rt], %[rt], r12\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0]\n\t" - "ldrd %[rt], r4, [r0, #8]\n\t" - "sbcs %[rt], %[rt], r11\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0, #8]\n\t" - "ldrd %[rt], r4, [r0, #16]\n\t" - "sbcs %[rt], %[rt], r11\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0, #16]\n\t" - "sbcs r7, r7, r11\n\t" - "sbc r8, r8, lr\n\t" - "strd r7, r8, [r0, #24]\n\t" - "mov r12, #-19\n\t" - "asr r11, r10, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Add modulus (if underflow) */ - "ldrd %[rt], r4, [r1]\n\t" - "adds %[rt], %[rt], r12\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1]\n\t" - "ldrd %[rt], r4, [r1, #8]\n\t" - "adcs %[rt], %[rt], r11\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1, #8]\n\t" - "ldrd %[rt], r4, [r1, #16]\n\t" - "adcs %[rt], %[rt], r11\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1, #16]\n\t" - "adcs r9, r9, r11\n\t" - "adc r10, r10, lr\n\t" - "strd r9, r10, [r1, #24]\n\t" - "ldr r0, [sp, #8]\n\t" - "ldr r1, [sp, #12]\n\t" - "add r2, sp, #16\n\t" - /* Add-Sub */ - /* Add */ - "ldrd %[rt], r4, [r2]\n\t" - "ldrd r5, r6, [r1]\n\t" - "adds r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0]\n\t" - /* Sub */ - "subs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1]\n\t" - /* Add */ - "ldrd %[rt], r4, [r2, #8]\n\t" - "ldrd r5, r6, [r1, #8]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0, #8]\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1, #8]\n\t" - /* Add */ - "ldrd %[rt], r4, [r2, #16]\n\t" - "ldrd r5, r6, [r1, #16]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0, #16]\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1, #16]\n\t" - /* Add */ - "ldrd %[rt], r4, [r2, #24]\n\t" - "ldrd r5, r6, [r1, #24]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "adc r8, r4, r6\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "sbc r10, r4, r6\n\t" - "mov r12, #-19\n\t" - "asr r11, r8, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Sub modulus (if overflow) */ - "ldrd %[rt], r4, [r0]\n\t" - "subs %[rt], %[rt], r12\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0]\n\t" - "ldrd %[rt], r4, [r0, #8]\n\t" - "sbcs %[rt], %[rt], r11\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0, #8]\n\t" - "ldrd %[rt], r4, [r0, #16]\n\t" - "sbcs %[rt], %[rt], r11\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0, #16]\n\t" - "sbcs r7, r7, r11\n\t" - "sbc r8, r8, lr\n\t" - "strd r7, r8, [r0, #24]\n\t" - "mov r12, #-19\n\t" - "asr r11, r10, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Add modulus (if underflow) */ - "ldrd %[rt], r4, [r1]\n\t" - "adds %[rt], %[rt], r12\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1]\n\t" - "ldrd %[rt], r4, [r1, #8]\n\t" - "adcs %[rt], %[rt], r11\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1, #8]\n\t" - "ldrd %[rt], r4, [r1, #16]\n\t" - "adcs %[rt], %[rt], r11\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1, #16]\n\t" - "adcs r9, r9, r11\n\t" - "adc r10, r10, lr\n\t" - "strd r9, r10, [r1, #24]\n\t" - "add sp, sp, #0x60\n\t" - : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [rt] "+r" (rt), [px] "+r" (px), [py] "+r" (py), [pz] "+r" (pz), [pt] "+r" (pt) - : - : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" - ); - (void)qz; - (void)qt2d; - (void)qyplusx; - (void)qyminusx; -} - -void fe_ge_sub(fe rx, fe ry, fe rz, fe rt, const fe px, const fe py, const fe pz, const fe pt, const fe qz, const fe qt2d, const fe qyplusx, const fe qyminusx) -{ - __asm__ __volatile__ ( - "sub sp, sp, #0x60\n\t" - "str %[rx], [sp]\n\t" - "str %[ry], [sp, #4]\n\t" - "str %[rz], [sp, #8]\n\t" - "str %[rt], [sp, #12]\n\t" - "ldr r0, [sp]\n\t" - "ldr r1, [sp, #172]\n\t" - "ldr r2, [sp, #168]\n\t" - /* Add */ - "ldrd %[rt], r4, [r1]\n\t" - "ldrd r5, r6, [r1, #8]\n\t" - "ldrd r7, r8, [r2]\n\t" - "ldrd r9, r10, [r2, #8]\n\t" - "adds r7, %[rt], r7\n\t" - "adcs r8, r4, r8\n\t" - "adcs r9, r5, r9\n\t" - "adcs r10, r6, r10\n\t" - "strd r7, r8, [r0]\n\t" - "strd r9, r10, [r0, #8]\n\t" - "ldrd %[rt], r4, [r1, #16]\n\t" - "ldrd r5, r6, [r1, #24]\n\t" - "ldrd r7, r8, [r2, #16]\n\t" - "ldrd r9, r10, [r2, #24]\n\t" - "adcs r7, %[rt], r7\n\t" - "adcs r8, r4, r8\n\t" - "adcs r9, r5, r9\n\t" - "adc r10, r6, r10\n\t" - "mov r12, #-19\n\t" - "asr r11, r10, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Sub modulus (if overflow) */ - "ldrd %[rt], r4, [r0]\n\t" - "ldrd r5, r6, [r0, #8]\n\t" - "subs %[rt], %[rt], r12\n\t" - "sbcs r4, r4, r11\n\t" - "sbcs r5, r5, r11\n\t" - "sbcs r6, r6, r11\n\t" - "sbcs r7, r7, r11\n\t" - "sbcs r8, r8, r11\n\t" - "sbcs r9, r9, r11\n\t" - "sbc r10, r10, lr\n\t" - "strd %[rt], r4, [r0]\n\t" - "strd r5, r6, [r0, #8]\n\t" - "strd r7, r8, [r0, #16]\n\t" - "strd r9, r10, [r0, #24]\n\t" - "ldr r0, [sp, #4]\n\t" - "ldr r1, [sp, #172]\n\t" - "ldr r2, [sp, #168]\n\t" - /* Sub */ - "ldrd %[rt], r4, [r1]\n\t" - "ldrd r5, r6, [r1, #8]\n\t" - "ldrd r7, r8, [r2]\n\t" - "ldrd r9, r10, [r2, #8]\n\t" - "subs r7, %[rt], r7\n\t" - "sbcs r8, r4, r8\n\t" - "sbcs r9, r5, r9\n\t" - "sbcs r10, r6, r10\n\t" - "strd r7, r8, [r0]\n\t" - "strd r9, r10, [r0, #8]\n\t" - "ldrd %[rt], r4, [r1, #16]\n\t" - "ldrd r5, r6, [r1, #24]\n\t" - "ldrd r7, r8, [r2, #16]\n\t" - "ldrd r9, r10, [r2, #24]\n\t" - "sbcs r7, %[rt], r7\n\t" - "sbcs r8, r4, r8\n\t" - "sbcs r9, r5, r9\n\t" - "sbc r10, r6, r10\n\t" - "mov r12, #-19\n\t" - "asr r11, r10, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Add modulus (if underflow) */ - "ldrd %[rt], r4, [r0]\n\t" - "ldrd r5, r6, [r0, #8]\n\t" - "adds %[rt], %[rt], r12\n\t" - "adcs r4, r4, r11\n\t" - "adcs r5, r5, r11\n\t" - "adcs r6, r6, r11\n\t" - "adcs r7, r7, r11\n\t" - "adcs r8, r8, r11\n\t" - "adcs r9, r9, r11\n\t" - "adc r10, r10, lr\n\t" - "strd %[rt], r4, [r0]\n\t" - "strd r5, r6, [r0, #8]\n\t" - "strd r7, r8, [r0, #16]\n\t" - "strd r9, r10, [r0, #24]\n\t" - "ldr r2, [sp, #196]\n\t" - "ldr r1, [sp]\n\t" - "ldr r0, [sp, #8]\n\t" - "bl fe_mul\n\t" - "ldr r2, [sp, #192]\n\t" - "ldr r1, [sp, #4]\n\t" - "ldr r0, [sp, #4]\n\t" - "bl fe_mul\n\t" - "ldr r2, [sp, #180]\n\t" - "ldr r1, [sp, #188]\n\t" - "ldr r0, [sp, #12]\n\t" - "bl fe_mul\n\t" - "ldr r2, [sp, #184]\n\t" - "ldr r1, [sp, #176]\n\t" - "ldr r0, [sp]\n\t" - "bl fe_mul\n\t" - "add r0, sp, #16\n\t" - "ldr r1, [sp]\n\t" - /* Double */ - "ldrd %[rt], r4, [r1]\n\t" - "ldrd r5, r6, [r1, #8]\n\t" - "ldrd r7, r8, [r1, #16]\n\t" - "ldrd r9, r10, [r1, #24]\n\t" - "adds %[rt], %[rt], %[rt]\n\t" - "adcs r4, r4, r4\n\t" - "adcs r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" - "adcs r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adc r10, r10, r10\n\t" - "mov r12, #-19\n\t" - "asr r11, r10, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Sub modulus (if overflow) */ - "subs %[rt], %[rt], r12\n\t" - "sbcs r4, r4, r11\n\t" - "sbcs r5, r5, r11\n\t" - "sbcs r6, r6, r11\n\t" - "sbcs r7, r7, r11\n\t" - "sbcs r8, r8, r11\n\t" - "sbcs r9, r9, r11\n\t" - "sbc r10, r10, lr\n\t" - "strd %[rt], r4, [r0]\n\t" - "strd r5, r6, [r0, #8]\n\t" - "strd r7, r8, [r0, #16]\n\t" - "strd r9, r10, [r0, #24]\n\t" - "ldr r0, [sp, #4]\n\t" - "ldr r1, [sp]\n\t" - "ldr r2, [sp, #8]\n\t" - /* Add-Sub */ - /* Add */ - "ldrd %[rt], r4, [r2]\n\t" - "ldrd r5, r6, [r0]\n\t" - "adds r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0]\n\t" - /* Sub */ - "subs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1]\n\t" - /* Add */ - "ldrd %[rt], r4, [r2, #8]\n\t" - "ldrd r5, r6, [r0, #8]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0, #8]\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1, #8]\n\t" - /* Add */ - "ldrd %[rt], r4, [r2, #16]\n\t" - "ldrd r5, r6, [r0, #16]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0, #16]\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1, #16]\n\t" - /* Add */ - "ldrd %[rt], r4, [r2, #24]\n\t" - "ldrd r5, r6, [r0, #24]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "adc r8, r4, r6\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "sbc r10, r4, r6\n\t" - "mov r12, #-19\n\t" - "asr r11, r8, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Sub modulus (if overflow) */ - "ldrd %[rt], r4, [r0]\n\t" - "subs %[rt], %[rt], r12\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0]\n\t" - "ldrd %[rt], r4, [r0, #8]\n\t" - "sbcs %[rt], %[rt], r11\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0, #8]\n\t" - "ldrd %[rt], r4, [r0, #16]\n\t" - "sbcs %[rt], %[rt], r11\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0, #16]\n\t" - "sbcs r7, r7, r11\n\t" - "sbc r8, r8, lr\n\t" - "strd r7, r8, [r0, #24]\n\t" - "mov r12, #-19\n\t" - "asr r11, r10, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Add modulus (if underflow) */ - "ldrd %[rt], r4, [r1]\n\t" - "adds %[rt], %[rt], r12\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1]\n\t" - "ldrd %[rt], r4, [r1, #8]\n\t" - "adcs %[rt], %[rt], r11\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1, #8]\n\t" - "ldrd %[rt], r4, [r1, #16]\n\t" - "adcs %[rt], %[rt], r11\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1, #16]\n\t" - "adcs r9, r9, r11\n\t" - "adc r10, r10, lr\n\t" - "strd r9, r10, [r1, #24]\n\t" - "ldr r0, [sp, #12]\n\t" - "ldr r1, [sp, #8]\n\t" - "add r2, sp, #16\n\t" - /* Add-Sub */ - /* Add */ - "ldrd %[rt], r4, [r2]\n\t" - "ldrd r5, r6, [r0]\n\t" - "adds r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0]\n\t" - /* Sub */ - "subs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1]\n\t" - /* Add */ - "ldrd %[rt], r4, [r2, #8]\n\t" - "ldrd r5, r6, [r0, #8]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0, #8]\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1, #8]\n\t" - /* Add */ - "ldrd %[rt], r4, [r2, #16]\n\t" - "ldrd r5, r6, [r0, #16]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "mov r12, #0\n\t" - "adcs r8, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "strd r7, r8, [r0, #16]\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "mov lr, #0\n\t" - "sbcs r10, r4, r6\n\t" - "adc lr, lr, #0\n\t" - "strd r9, r10, [r1, #16]\n\t" - /* Add */ - "ldrd %[rt], r4, [r2, #24]\n\t" - "ldrd r5, r6, [r0, #24]\n\t" - "adds r12, r12, #-1\n\t" - "adcs r7, %[rt], r5\n\t" - "adc r8, r4, r6\n\t" - /* Sub */ - "adds lr, lr, #-1\n\t" - "sbcs r9, %[rt], r5\n\t" - "sbc r10, r4, r6\n\t" - "mov r12, #-19\n\t" - "asr r11, r8, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Sub modulus (if overflow) */ - "ldrd %[rt], r4, [r0]\n\t" - "subs %[rt], %[rt], r12\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0]\n\t" - "ldrd %[rt], r4, [r0, #8]\n\t" - "sbcs %[rt], %[rt], r11\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0, #8]\n\t" - "ldrd %[rt], r4, [r0, #16]\n\t" - "sbcs %[rt], %[rt], r11\n\t" - "sbcs r4, r4, r11\n\t" - "strd %[rt], r4, [r0, #16]\n\t" - "sbcs r7, r7, r11\n\t" - "sbc r8, r8, lr\n\t" - "strd r7, r8, [r0, #24]\n\t" - "mov r12, #-19\n\t" - "asr r11, r10, #31\n\t" - /* Mask the modulus */ - "and r12, r11, r12\n\t" - "and lr, r11, #0x7fffffff\n\t" - /* Add modulus (if underflow) */ - "ldrd %[rt], r4, [r1]\n\t" - "adds %[rt], %[rt], r12\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1]\n\t" - "ldrd %[rt], r4, [r1, #8]\n\t" - "adcs %[rt], %[rt], r11\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1, #8]\n\t" - "ldrd %[rt], r4, [r1, #16]\n\t" - "adcs %[rt], %[rt], r11\n\t" - "adcs r4, r4, r11\n\t" - "strd %[rt], r4, [r1, #16]\n\t" - "adcs r9, r9, r11\n\t" - "adc r10, r10, lr\n\t" - "strd r9, r10, [r1, #24]\n\t" - "add sp, sp, #0x60\n\t" - : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [rt] "+r" (rt), [px] "+r" (px), [py] "+r" (py), [pz] "+r" (pz), [pt] "+r" (pt) - : - : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" - ); - (void)qz; - (void)qt2d; - (void)qyplusx; - (void)qyminusx; -} - -#endif /* WOLFSSL_ARMASM */ -#endif /* !__aarch64__ */ |