aboutsummaryrefslogtreecommitdiff
path: root/client/wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.c
diff options
context:
space:
mode:
Diffstat (limited to 'client/wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.c')
-rw-r--r--client/wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.c1041
1 files changed, 1041 insertions, 0 deletions
diff --git a/client/wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.c b/client/wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.c
new file mode 100644
index 0000000..d323598
--- /dev/null
+++ b/client/wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.c
@@ -0,0 +1,1041 @@
+/* armv8-sha512-asm
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* Generated using (from wolfssl):
+ * cd ../scripts
+ * ruby ./sha2/sha512.rb arm64 ../wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.c
+ */
+#ifdef __aarch64__
+#include <stdint.h>
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+#ifdef WOLFSSL_ARMASM
+#include <wolfssl/wolfcrypt/sha512.h>
+
+static const uint64_t L_SHA512_transform_neon_len_k[] = {
+ 0x428a2f98d728ae22UL,
+ 0x7137449123ef65cdUL,
+ 0xb5c0fbcfec4d3b2fUL,
+ 0xe9b5dba58189dbbcUL,
+ 0x3956c25bf348b538UL,
+ 0x59f111f1b605d019UL,
+ 0x923f82a4af194f9bUL,
+ 0xab1c5ed5da6d8118UL,
+ 0xd807aa98a3030242UL,
+ 0x12835b0145706fbeUL,
+ 0x243185be4ee4b28cUL,
+ 0x550c7dc3d5ffb4e2UL,
+ 0x72be5d74f27b896fUL,
+ 0x80deb1fe3b1696b1UL,
+ 0x9bdc06a725c71235UL,
+ 0xc19bf174cf692694UL,
+ 0xe49b69c19ef14ad2UL,
+ 0xefbe4786384f25e3UL,
+ 0xfc19dc68b8cd5b5UL,
+ 0x240ca1cc77ac9c65UL,
+ 0x2de92c6f592b0275UL,
+ 0x4a7484aa6ea6e483UL,
+ 0x5cb0a9dcbd41fbd4UL,
+ 0x76f988da831153b5UL,
+ 0x983e5152ee66dfabUL,
+ 0xa831c66d2db43210UL,
+ 0xb00327c898fb213fUL,
+ 0xbf597fc7beef0ee4UL,
+ 0xc6e00bf33da88fc2UL,
+ 0xd5a79147930aa725UL,
+ 0x6ca6351e003826fUL,
+ 0x142929670a0e6e70UL,
+ 0x27b70a8546d22ffcUL,
+ 0x2e1b21385c26c926UL,
+ 0x4d2c6dfc5ac42aedUL,
+ 0x53380d139d95b3dfUL,
+ 0x650a73548baf63deUL,
+ 0x766a0abb3c77b2a8UL,
+ 0x81c2c92e47edaee6UL,
+ 0x92722c851482353bUL,
+ 0xa2bfe8a14cf10364UL,
+ 0xa81a664bbc423001UL,
+ 0xc24b8b70d0f89791UL,
+ 0xc76c51a30654be30UL,
+ 0xd192e819d6ef5218UL,
+ 0xd69906245565a910UL,
+ 0xf40e35855771202aUL,
+ 0x106aa07032bbd1b8UL,
+ 0x19a4c116b8d2d0c8UL,
+ 0x1e376c085141ab53UL,
+ 0x2748774cdf8eeb99UL,
+ 0x34b0bcb5e19b48a8UL,
+ 0x391c0cb3c5c95a63UL,
+ 0x4ed8aa4ae3418acbUL,
+ 0x5b9cca4f7763e373UL,
+ 0x682e6ff3d6b2b8a3UL,
+ 0x748f82ee5defb2fcUL,
+ 0x78a5636f43172f60UL,
+ 0x84c87814a1f0ab72UL,
+ 0x8cc702081a6439ecUL,
+ 0x90befffa23631e28UL,
+ 0xa4506cebde82bde9UL,
+ 0xbef9a3f7b2c67915UL,
+ 0xc67178f2e372532bUL,
+ 0xca273eceea26619cUL,
+ 0xd186b8c721c0c207UL,
+ 0xeada7dd6cde0eb1eUL,
+ 0xf57d4f7fee6ed178UL,
+ 0x6f067aa72176fbaUL,
+ 0xa637dc5a2c898a6UL,
+ 0x113f9804bef90daeUL,
+ 0x1b710b35131c471bUL,
+ 0x28db77f523047d84UL,
+ 0x32caab7b40c72493UL,
+ 0x3c9ebe0a15c9bebcUL,
+ 0x431d67c49c100d4cUL,
+ 0x4cc5d4becb3e42b6UL,
+ 0x597f299cfc657e2aUL,
+ 0x5fcb6fab3ad6faecUL,
+ 0x6c44198c4a475817UL,
+};
+
+static const uint64_t L_SHA512_transform_neon_len_ror8[] = {
+ 0x7060504030201UL,
+ 0x80f0e0d0c0b0a09UL,
+};
+
+void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
+{
+ __asm__ __volatile__ (
+ "stp x29, x30, [sp, #-16]!\n\t"
+ "add x29, sp, #0\n\t"
+ "adr x3, %[L_SHA512_transform_neon_len_k]\n\t"
+ "adr x27, %[L_SHA512_transform_neon_len_ror8]\n\t"
+ "ld1 {v11.16b}, [x27]\n\t"
+ /* Load digest into working vars */
+ "ldp x4, x5, [%x[sha512]]\n\t"
+ "ldp x6, x7, [%x[sha512], #16]\n\t"
+ "ldp x8, x9, [%x[sha512], #32]\n\t"
+ "ldp x10, x11, [%x[sha512], #48]\n\t"
+ /* Start of loop processing a block */
+ "\n"
+ "L_sha512_len_neon_begin_%=: \n\t"
+ /* Load W */
+ /* Copy digest to add in at end */
+ "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[data]], #0x40\n\t"
+ "mov x19, x4\n\t"
+ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[data]], #0x40\n\t"
+ "mov x20, x5\n\t"
+ "rev64 v0.16b, v0.16b\n\t"
+ "mov x21, x6\n\t"
+ "rev64 v1.16b, v1.16b\n\t"
+ "mov x22, x7\n\t"
+ "rev64 v2.16b, v2.16b\n\t"
+ "mov x23, x8\n\t"
+ "rev64 v3.16b, v3.16b\n\t"
+ "mov x24, x9\n\t"
+ "rev64 v4.16b, v4.16b\n\t"
+ "mov x25, x10\n\t"
+ "rev64 v5.16b, v5.16b\n\t"
+ "mov x26, x11\n\t"
+ "rev64 v6.16b, v6.16b\n\t"
+ "rev64 v7.16b, v7.16b\n\t"
+ /* Pre-calc: b ^ c */
+ "eor x16, x5, x6\n\t"
+ "mov x27, #4\n\t"
+ /* Start of 16 rounds */
+ "\n"
+ "L_sha512_len_neon_start_%=: \n\t"
+ /* Round 0 */
+ "mov x13, v0.d[0]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ror x12, x8, #14\n\t"
+ "ror x14, x4, #28\n\t"
+ "eor x12, x12, x8, ror 18\n\t"
+ "eor x14, x14, x4, ror 34\n\t"
+ "eor x12, x12, x8, ror 41\n\t"
+ "eor x14, x14, x4, ror 39\n\t"
+ "add x11, x11, x12\n\t"
+ "eor x17, x4, x5\n\t"
+ "eor x12, x9, x10\n\t"
+ "and x16, x17, x16\n\t"
+ "and x12, x12, x8\n\t"
+ "add x11, x11, x13\n\t"
+ "eor x12, x12, x10\n\t"
+ "add x11, x11, x15\n\t"
+ "eor x16, x16, x5\n\t"
+ "add x11, x11, x12\n\t"
+ "add x14, x14, x16\n\t"
+ "add x7, x7, x11\n\t"
+ "add x11, x11, x14\n\t"
+ /* Round 1 */
+ "mov x13, v0.d[1]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ext v10.16b, v0.16b, v1.16b, #8\n\t"
+ "ror x12, x7, #14\n\t"
+ "shl v8.2d, v7.2d, #45\n\t"
+ "ror x14, x11, #28\n\t"
+ "sri v8.2d, v7.2d, #19\n\t"
+ "eor x12, x12, x7, ror 18\n\t"
+ "shl v9.2d, v7.2d, #3\n\t"
+ "eor x14, x14, x11, ror 34\n\t"
+ "sri v9.2d, v7.2d, #61\n\t"
+ "eor x12, x12, x7, ror 41\n\t"
+ "eor v9.16b, v9.16b, v8.16b\n\t"
+ "eor x14, x14, x11, ror 39\n\t"
+ "ushr v8.2d, v7.2d, #6\n\t"
+ "add x10, x10, x12\n\t"
+ "eor v9.16b, v9.16b, v8.16b\n\t"
+ "eor x16, x11, x4\n\t"
+ "add v0.2d, v0.2d, v9.2d\n\t"
+ "eor x12, x8, x9\n\t"
+ "ext v9.16b, v4.16b, v5.16b, #8\n\t"
+ "and x17, x16, x17\n\t"
+ "add v0.2d, v0.2d, v9.2d\n\t"
+ "and x12, x12, x7\n\t"
+ "shl v8.2d, v10.2d, #63\n\t"
+ "add x10, x10, x13\n\t"
+ "sri v8.2d, v10.2d, #1\n\t"
+ "eor x12, x12, x9\n\t"
+ "tbl v9.16b, {v10.16b}, v11.16b\n\t"
+ "add x10, x10, x15\n\t"
+ "eor v9.16b, v9.16b, v8.16b\n\t"
+ "eor x17, x17, x4\n\t"
+ "ushr v10.2d, v10.2d, #7\n\t"
+ "add x10, x10, x12\n\t"
+ "eor v9.16b, v9.16b, v10.16b\n\t"
+ "add x14, x14, x17\n\t"
+ "add v0.2d, v0.2d, v9.2d\n\t"
+ "add x6, x6, x10\n\t"
+ "add x10, x10, x14\n\t"
+ /* Round 2 */
+ "mov x13, v1.d[0]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ror x12, x6, #14\n\t"
+ "ror x14, x10, #28\n\t"
+ "eor x12, x12, x6, ror 18\n\t"
+ "eor x14, x14, x10, ror 34\n\t"
+ "eor x12, x12, x6, ror 41\n\t"
+ "eor x14, x14, x10, ror 39\n\t"
+ "add x9, x9, x12\n\t"
+ "eor x17, x10, x11\n\t"
+ "eor x12, x7, x8\n\t"
+ "and x16, x17, x16\n\t"
+ "and x12, x12, x6\n\t"
+ "add x9, x9, x13\n\t"
+ "eor x12, x12, x8\n\t"
+ "add x9, x9, x15\n\t"
+ "eor x16, x16, x11\n\t"
+ "add x9, x9, x12\n\t"
+ "add x14, x14, x16\n\t"
+ "add x5, x5, x9\n\t"
+ "add x9, x9, x14\n\t"
+ /* Round 3 */
+ "mov x13, v1.d[1]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ext v10.16b, v1.16b, v2.16b, #8\n\t"
+ "ror x12, x5, #14\n\t"
+ "shl v8.2d, v0.2d, #45\n\t"
+ "ror x14, x9, #28\n\t"
+ "sri v8.2d, v0.2d, #19\n\t"
+ "eor x12, x12, x5, ror 18\n\t"
+ "shl v9.2d, v0.2d, #3\n\t"
+ "eor x14, x14, x9, ror 34\n\t"
+ "sri v9.2d, v0.2d, #61\n\t"
+ "eor x12, x12, x5, ror 41\n\t"
+ "eor v9.16b, v9.16b, v8.16b\n\t"
+ "eor x14, x14, x9, ror 39\n\t"
+ "ushr v8.2d, v0.2d, #6\n\t"
+ "add x8, x8, x12\n\t"
+ "eor v9.16b, v9.16b, v8.16b\n\t"
+ "eor x16, x9, x10\n\t"
+ "add v1.2d, v1.2d, v9.2d\n\t"
+ "eor x12, x6, x7\n\t"
+ "ext v9.16b, v5.16b, v6.16b, #8\n\t"
+ "and x17, x16, x17\n\t"
+ "add v1.2d, v1.2d, v9.2d\n\t"
+ "and x12, x12, x5\n\t"
+ "shl v8.2d, v10.2d, #63\n\t"
+ "add x8, x8, x13\n\t"
+ "sri v8.2d, v10.2d, #1\n\t"
+ "eor x12, x12, x7\n\t"
+ "tbl v9.16b, {v10.16b}, v11.16b\n\t"
+ "add x8, x8, x15\n\t"
+ "eor v9.16b, v9.16b, v8.16b\n\t"
+ "eor x17, x17, x10\n\t"
+ "ushr v10.2d, v10.2d, #7\n\t"
+ "add x8, x8, x12\n\t"
+ "eor v9.16b, v9.16b, v10.16b\n\t"
+ "add x14, x14, x17\n\t"
+ "add v1.2d, v1.2d, v9.2d\n\t"
+ "add x4, x4, x8\n\t"
+ "add x8, x8, x14\n\t"
+ /* Round 4 */
+ "mov x13, v2.d[0]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ror x12, x4, #14\n\t"
+ "ror x14, x8, #28\n\t"
+ "eor x12, x12, x4, ror 18\n\t"
+ "eor x14, x14, x8, ror 34\n\t"
+ "eor x12, x12, x4, ror 41\n\t"
+ "eor x14, x14, x8, ror 39\n\t"
+ "add x7, x7, x12\n\t"
+ "eor x17, x8, x9\n\t"
+ "eor x12, x5, x6\n\t"
+ "and x16, x17, x16\n\t"
+ "and x12, x12, x4\n\t"
+ "add x7, x7, x13\n\t"
+ "eor x12, x12, x6\n\t"
+ "add x7, x7, x15\n\t"
+ "eor x16, x16, x9\n\t"
+ "add x7, x7, x12\n\t"
+ "add x14, x14, x16\n\t"
+ "add x11, x11, x7\n\t"
+ "add x7, x7, x14\n\t"
+ /* Round 5 */
+ "mov x13, v2.d[1]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ext v10.16b, v2.16b, v3.16b, #8\n\t"
+ "ror x12, x11, #14\n\t"
+ "shl v8.2d, v1.2d, #45\n\t"
+ "ror x14, x7, #28\n\t"
+ "sri v8.2d, v1.2d, #19\n\t"
+ "eor x12, x12, x11, ror 18\n\t"
+ "shl v9.2d, v1.2d, #3\n\t"
+ "eor x14, x14, x7, ror 34\n\t"
+ "sri v9.2d, v1.2d, #61\n\t"
+ "eor x12, x12, x11, ror 41\n\t"
+ "eor v9.16b, v9.16b, v8.16b\n\t"
+ "eor x14, x14, x7, ror 39\n\t"
+ "ushr v8.2d, v1.2d, #6\n\t"
+ "add x6, x6, x12\n\t"
+ "eor v9.16b, v9.16b, v8.16b\n\t"
+ "eor x16, x7, x8\n\t"
+ "add v2.2d, v2.2d, v9.2d\n\t"
+ "eor x12, x4, x5\n\t"
+ "ext v9.16b, v6.16b, v7.16b, #8\n\t"
+ "and x17, x16, x17\n\t"
+ "add v2.2d, v2.2d, v9.2d\n\t"
+ "and x12, x12, x11\n\t"
+ "shl v8.2d, v10.2d, #63\n\t"
+ "add x6, x6, x13\n\t"
+ "sri v8.2d, v10.2d, #1\n\t"
+ "eor x12, x12, x5\n\t"
+ "tbl v9.16b, {v10.16b}, v11.16b\n\t"
+ "add x6, x6, x15\n\t"
+ "eor v9.16b, v9.16b, v8.16b\n\t"
+ "eor x17, x17, x8\n\t"
+ "ushr v10.2d, v10.2d, #7\n\t"
+ "add x6, x6, x12\n\t"
+ "eor v9.16b, v9.16b, v10.16b\n\t"
+ "add x14, x14, x17\n\t"
+ "add v2.2d, v2.2d, v9.2d\n\t"
+ "add x10, x10, x6\n\t"
+ "add x6, x6, x14\n\t"
+ /* Round 6 */
+ "mov x13, v3.d[0]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ror x12, x10, #14\n\t"
+ "ror x14, x6, #28\n\t"
+ "eor x12, x12, x10, ror 18\n\t"
+ "eor x14, x14, x6, ror 34\n\t"
+ "eor x12, x12, x10, ror 41\n\t"
+ "eor x14, x14, x6, ror 39\n\t"
+ "add x5, x5, x12\n\t"
+ "eor x17, x6, x7\n\t"
+ "eor x12, x11, x4\n\t"
+ "and x16, x17, x16\n\t"
+ "and x12, x12, x10\n\t"
+ "add x5, x5, x13\n\t"
+ "eor x12, x12, x4\n\t"
+ "add x5, x5, x15\n\t"
+ "eor x16, x16, x7\n\t"
+ "add x5, x5, x12\n\t"
+ "add x14, x14, x16\n\t"
+ "add x9, x9, x5\n\t"
+ "add x5, x5, x14\n\t"
+ /* Round 7 */
+ "mov x13, v3.d[1]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ext v10.16b, v3.16b, v4.16b, #8\n\t"
+ "ror x12, x9, #14\n\t"
+ "shl v8.2d, v2.2d, #45\n\t"
+ "ror x14, x5, #28\n\t"
+ "sri v8.2d, v2.2d, #19\n\t"
+ "eor x12, x12, x9, ror 18\n\t"
+ "shl v9.2d, v2.2d, #3\n\t"
+ "eor x14, x14, x5, ror 34\n\t"
+ "sri v9.2d, v2.2d, #61\n\t"
+ "eor x12, x12, x9, ror 41\n\t"
+ "eor v9.16b, v9.16b, v8.16b\n\t"
+ "eor x14, x14, x5, ror 39\n\t"
+ "ushr v8.2d, v2.2d, #6\n\t"
+ "add x4, x4, x12\n\t"
+ "eor v9.16b, v9.16b, v8.16b\n\t"
+ "eor x16, x5, x6\n\t"
+ "add v3.2d, v3.2d, v9.2d\n\t"
+ "eor x12, x10, x11\n\t"
+ "ext v9.16b, v7.16b, v0.16b, #8\n\t"
+ "and x17, x16, x17\n\t"
+ "add v3.2d, v3.2d, v9.2d\n\t"
+ "and x12, x12, x9\n\t"
+ "shl v8.2d, v10.2d, #63\n\t"
+ "add x4, x4, x13\n\t"
+ "sri v8.2d, v10.2d, #1\n\t"
+ "eor x12, x12, x11\n\t"
+ "tbl v9.16b, {v10.16b}, v11.16b\n\t"
+ "add x4, x4, x15\n\t"
+ "eor v9.16b, v9.16b, v8.16b\n\t"
+ "eor x17, x17, x6\n\t"
+ "ushr v10.2d, v10.2d, #7\n\t"
+ "add x4, x4, x12\n\t"
+ "eor v9.16b, v9.16b, v10.16b\n\t"
+ "add x14, x14, x17\n\t"
+ "add v3.2d, v3.2d, v9.2d\n\t"
+ "add x8, x8, x4\n\t"
+ "add x4, x4, x14\n\t"
+ /* Round 8 */
+ "mov x13, v4.d[0]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ror x12, x8, #14\n\t"
+ "ror x14, x4, #28\n\t"
+ "eor x12, x12, x8, ror 18\n\t"
+ "eor x14, x14, x4, ror 34\n\t"
+ "eor x12, x12, x8, ror 41\n\t"
+ "eor x14, x14, x4, ror 39\n\t"
+ "add x11, x11, x12\n\t"
+ "eor x17, x4, x5\n\t"
+ "eor x12, x9, x10\n\t"
+ "and x16, x17, x16\n\t"
+ "and x12, x12, x8\n\t"
+ "add x11, x11, x13\n\t"
+ "eor x12, x12, x10\n\t"
+ "add x11, x11, x15\n\t"
+ "eor x16, x16, x5\n\t"
+ "add x11, x11, x12\n\t"
+ "add x14, x14, x16\n\t"
+ "add x7, x7, x11\n\t"
+ "add x11, x11, x14\n\t"
+ /* Round 9 */
+ "mov x13, v4.d[1]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ext v10.16b, v4.16b, v5.16b, #8\n\t"
+ "ror x12, x7, #14\n\t"
+ "shl v8.2d, v3.2d, #45\n\t"
+ "ror x14, x11, #28\n\t"
+ "sri v8.2d, v3.2d, #19\n\t"
+ "eor x12, x12, x7, ror 18\n\t"
+ "shl v9.2d, v3.2d, #3\n\t"
+ "eor x14, x14, x11, ror 34\n\t"
+ "sri v9.2d, v3.2d, #61\n\t"
+ "eor x12, x12, x7, ror 41\n\t"
+ "eor v9.16b, v9.16b, v8.16b\n\t"
+ "eor x14, x14, x11, ror 39\n\t"
+ "ushr v8.2d, v3.2d, #6\n\t"
+ "add x10, x10, x12\n\t"
+ "eor v9.16b, v9.16b, v8.16b\n\t"
+ "eor x16, x11, x4\n\t"
+ "add v4.2d, v4.2d, v9.2d\n\t"
+ "eor x12, x8, x9\n\t"
+ "ext v9.16b, v0.16b, v1.16b, #8\n\t"
+ "and x17, x16, x17\n\t"
+ "add v4.2d, v4.2d, v9.2d\n\t"
+ "and x12, x12, x7\n\t"
+ "shl v8.2d, v10.2d, #63\n\t"
+ "add x10, x10, x13\n\t"
+ "sri v8.2d, v10.2d, #1\n\t"
+ "eor x12, x12, x9\n\t"
+ "tbl v9.16b, {v10.16b}, v11.16b\n\t"
+ "add x10, x10, x15\n\t"
+ "eor v9.16b, v9.16b, v8.16b\n\t"
+ "eor x17, x17, x4\n\t"
+ "ushr v10.2d, v10.2d, #7\n\t"
+ "add x10, x10, x12\n\t"
+ "eor v9.16b, v9.16b, v10.16b\n\t"
+ "add x14, x14, x17\n\t"
+ "add v4.2d, v4.2d, v9.2d\n\t"
+ "add x6, x6, x10\n\t"
+ "add x10, x10, x14\n\t"
+ /* Round 10 */
+ "mov x13, v5.d[0]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ror x12, x6, #14\n\t"
+ "ror x14, x10, #28\n\t"
+ "eor x12, x12, x6, ror 18\n\t"
+ "eor x14, x14, x10, ror 34\n\t"
+ "eor x12, x12, x6, ror 41\n\t"
+ "eor x14, x14, x10, ror 39\n\t"
+ "add x9, x9, x12\n\t"
+ "eor x17, x10, x11\n\t"
+ "eor x12, x7, x8\n\t"
+ "and x16, x17, x16\n\t"
+ "and x12, x12, x6\n\t"
+ "add x9, x9, x13\n\t"
+ "eor x12, x12, x8\n\t"
+ "add x9, x9, x15\n\t"
+ "eor x16, x16, x11\n\t"
+ "add x9, x9, x12\n\t"
+ "add x14, x14, x16\n\t"
+ "add x5, x5, x9\n\t"
+ "add x9, x9, x14\n\t"
+ /* Round 11 */
+ "mov x13, v5.d[1]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ext v10.16b, v5.16b, v6.16b, #8\n\t"
+ "ror x12, x5, #14\n\t"
+ "shl v8.2d, v4.2d, #45\n\t"
+ "ror x14, x9, #28\n\t"
+ "sri v8.2d, v4.2d, #19\n\t"
+ "eor x12, x12, x5, ror 18\n\t"
+ "shl v9.2d, v4.2d, #3\n\t"
+ "eor x14, x14, x9, ror 34\n\t"
+ "sri v9.2d, v4.2d, #61\n\t"
+ "eor x12, x12, x5, ror 41\n\t"
+ "eor v9.16b, v9.16b, v8.16b\n\t"
+ "eor x14, x14, x9, ror 39\n\t"
+ "ushr v8.2d, v4.2d, #6\n\t"
+ "add x8, x8, x12\n\t"
+ "eor v9.16b, v9.16b, v8.16b\n\t"
+ "eor x16, x9, x10\n\t"
+ "add v5.2d, v5.2d, v9.2d\n\t"
+ "eor x12, x6, x7\n\t"
+ "ext v9.16b, v1.16b, v2.16b, #8\n\t"
+ "and x17, x16, x17\n\t"
+ "add v5.2d, v5.2d, v9.2d\n\t"
+ "and x12, x12, x5\n\t"
+ "shl v8.2d, v10.2d, #63\n\t"
+ "add x8, x8, x13\n\t"
+ "sri v8.2d, v10.2d, #1\n\t"
+ "eor x12, x12, x7\n\t"
+ "tbl v9.16b, {v10.16b}, v11.16b\n\t"
+ "add x8, x8, x15\n\t"
+ "eor v9.16b, v9.16b, v8.16b\n\t"
+ "eor x17, x17, x10\n\t"
+ "ushr v10.2d, v10.2d, #7\n\t"
+ "add x8, x8, x12\n\t"
+ "eor v9.16b, v9.16b, v10.16b\n\t"
+ "add x14, x14, x17\n\t"
+ "add v5.2d, v5.2d, v9.2d\n\t"
+ "add x4, x4, x8\n\t"
+ "add x8, x8, x14\n\t"
+ /* Round 12 */
+ "mov x13, v6.d[0]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ror x12, x4, #14\n\t"
+ "ror x14, x8, #28\n\t"
+ "eor x12, x12, x4, ror 18\n\t"
+ "eor x14, x14, x8, ror 34\n\t"
+ "eor x12, x12, x4, ror 41\n\t"
+ "eor x14, x14, x8, ror 39\n\t"
+ "add x7, x7, x12\n\t"
+ "eor x17, x8, x9\n\t"
+ "eor x12, x5, x6\n\t"
+ "and x16, x17, x16\n\t"
+ "and x12, x12, x4\n\t"
+ "add x7, x7, x13\n\t"
+ "eor x12, x12, x6\n\t"
+ "add x7, x7, x15\n\t"
+ "eor x16, x16, x9\n\t"
+ "add x7, x7, x12\n\t"
+ "add x14, x14, x16\n\t"
+ "add x11, x11, x7\n\t"
+ "add x7, x7, x14\n\t"
+ /* Round 13 */
+ "mov x13, v6.d[1]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ext v10.16b, v6.16b, v7.16b, #8\n\t"
+ "ror x12, x11, #14\n\t"
+ "shl v8.2d, v5.2d, #45\n\t"
+ "ror x14, x7, #28\n\t"
+ "sri v8.2d, v5.2d, #19\n\t"
+ "eor x12, x12, x11, ror 18\n\t"
+ "shl v9.2d, v5.2d, #3\n\t"
+ "eor x14, x14, x7, ror 34\n\t"
+ "sri v9.2d, v5.2d, #61\n\t"
+ "eor x12, x12, x11, ror 41\n\t"
+ "eor v9.16b, v9.16b, v8.16b\n\t"
+ "eor x14, x14, x7, ror 39\n\t"
+ "ushr v8.2d, v5.2d, #6\n\t"
+ "add x6, x6, x12\n\t"
+ "eor v9.16b, v9.16b, v8.16b\n\t"
+ "eor x16, x7, x8\n\t"
+ "add v6.2d, v6.2d, v9.2d\n\t"
+ "eor x12, x4, x5\n\t"
+ "ext v9.16b, v2.16b, v3.16b, #8\n\t"
+ "and x17, x16, x17\n\t"
+ "add v6.2d, v6.2d, v9.2d\n\t"
+ "and x12, x12, x11\n\t"
+ "shl v8.2d, v10.2d, #63\n\t"
+ "add x6, x6, x13\n\t"
+ "sri v8.2d, v10.2d, #1\n\t"
+ "eor x12, x12, x5\n\t"
+ "tbl v9.16b, {v10.16b}, v11.16b\n\t"
+ "add x6, x6, x15\n\t"
+ "eor v9.16b, v9.16b, v8.16b\n\t"
+ "eor x17, x17, x8\n\t"
+ "ushr v10.2d, v10.2d, #7\n\t"
+ "add x6, x6, x12\n\t"
+ "eor v9.16b, v9.16b, v10.16b\n\t"
+ "add x14, x14, x17\n\t"
+ "add v6.2d, v6.2d, v9.2d\n\t"
+ "add x10, x10, x6\n\t"
+ "add x6, x6, x14\n\t"
+ /* Round 14 */
+ "mov x13, v7.d[0]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ror x12, x10, #14\n\t"
+ "ror x14, x6, #28\n\t"
+ "eor x12, x12, x10, ror 18\n\t"
+ "eor x14, x14, x6, ror 34\n\t"
+ "eor x12, x12, x10, ror 41\n\t"
+ "eor x14, x14, x6, ror 39\n\t"
+ "add x5, x5, x12\n\t"
+ "eor x17, x6, x7\n\t"
+ "eor x12, x11, x4\n\t"
+ "and x16, x17, x16\n\t"
+ "and x12, x12, x10\n\t"
+ "add x5, x5, x13\n\t"
+ "eor x12, x12, x4\n\t"
+ "add x5, x5, x15\n\t"
+ "eor x16, x16, x7\n\t"
+ "add x5, x5, x12\n\t"
+ "add x14, x14, x16\n\t"
+ "add x9, x9, x5\n\t"
+ "add x5, x5, x14\n\t"
+ /* Round 15 */
+ "mov x13, v7.d[1]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ext v10.16b, v7.16b, v0.16b, #8\n\t"
+ "ror x12, x9, #14\n\t"
+ "shl v8.2d, v6.2d, #45\n\t"
+ "ror x14, x5, #28\n\t"
+ "sri v8.2d, v6.2d, #19\n\t"
+ "eor x12, x12, x9, ror 18\n\t"
+ "shl v9.2d, v6.2d, #3\n\t"
+ "eor x14, x14, x5, ror 34\n\t"
+ "sri v9.2d, v6.2d, #61\n\t"
+ "eor x12, x12, x9, ror 41\n\t"
+ "eor v9.16b, v9.16b, v8.16b\n\t"
+ "eor x14, x14, x5, ror 39\n\t"
+ "ushr v8.2d, v6.2d, #6\n\t"
+ "add x4, x4, x12\n\t"
+ "eor v9.16b, v9.16b, v8.16b\n\t"
+ "eor x16, x5, x6\n\t"
+ "add v7.2d, v7.2d, v9.2d\n\t"
+ "eor x12, x10, x11\n\t"
+ "ext v9.16b, v3.16b, v4.16b, #8\n\t"
+ "and x17, x16, x17\n\t"
+ "add v7.2d, v7.2d, v9.2d\n\t"
+ "and x12, x12, x9\n\t"
+ "shl v8.2d, v10.2d, #63\n\t"
+ "add x4, x4, x13\n\t"
+ "sri v8.2d, v10.2d, #1\n\t"
+ "eor x12, x12, x11\n\t"
+ "tbl v9.16b, {v10.16b}, v11.16b\n\t"
+ "add x4, x4, x15\n\t"
+ "eor v9.16b, v9.16b, v8.16b\n\t"
+ "eor x17, x17, x6\n\t"
+ "ushr v10.2d, v10.2d, #7\n\t"
+ "add x4, x4, x12\n\t"
+ "eor v9.16b, v9.16b, v10.16b\n\t"
+ "add x14, x14, x17\n\t"
+ "add v7.2d, v7.2d, v9.2d\n\t"
+ "add x8, x8, x4\n\t"
+ "add x4, x4, x14\n\t"
+ "subs x27, x27, #1\n\t"
+ "bne L_sha512_len_neon_start_%=\n\t"
+ /* Round 0 */
+ "mov x13, v0.d[0]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ror x12, x8, #14\n\t"
+ "ror x14, x4, #28\n\t"
+ "eor x12, x12, x8, ror 18\n\t"
+ "eor x14, x14, x4, ror 34\n\t"
+ "eor x12, x12, x8, ror 41\n\t"
+ "eor x14, x14, x4, ror 39\n\t"
+ "add x11, x11, x12\n\t"
+ "eor x17, x4, x5\n\t"
+ "eor x12, x9, x10\n\t"
+ "and x16, x17, x16\n\t"
+ "and x12, x12, x8\n\t"
+ "add x11, x11, x13\n\t"
+ "eor x12, x12, x10\n\t"
+ "add x11, x11, x15\n\t"
+ "eor x16, x16, x5\n\t"
+ "add x11, x11, x12\n\t"
+ "add x14, x14, x16\n\t"
+ "add x7, x7, x11\n\t"
+ "add x11, x11, x14\n\t"
+ /* Round 1 */
+ "mov x13, v0.d[1]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ror x12, x7, #14\n\t"
+ "ror x14, x11, #28\n\t"
+ "eor x12, x12, x7, ror 18\n\t"
+ "eor x14, x14, x11, ror 34\n\t"
+ "eor x12, x12, x7, ror 41\n\t"
+ "eor x14, x14, x11, ror 39\n\t"
+ "add x10, x10, x12\n\t"
+ "eor x16, x11, x4\n\t"
+ "eor x12, x8, x9\n\t"
+ "and x17, x16, x17\n\t"
+ "and x12, x12, x7\n\t"
+ "add x10, x10, x13\n\t"
+ "eor x12, x12, x9\n\t"
+ "add x10, x10, x15\n\t"
+ "eor x17, x17, x4\n\t"
+ "add x10, x10, x12\n\t"
+ "add x14, x14, x17\n\t"
+ "add x6, x6, x10\n\t"
+ "add x10, x10, x14\n\t"
+ /* Round 2 */
+ "mov x13, v1.d[0]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ror x12, x6, #14\n\t"
+ "ror x14, x10, #28\n\t"
+ "eor x12, x12, x6, ror 18\n\t"
+ "eor x14, x14, x10, ror 34\n\t"
+ "eor x12, x12, x6, ror 41\n\t"
+ "eor x14, x14, x10, ror 39\n\t"
+ "add x9, x9, x12\n\t"
+ "eor x17, x10, x11\n\t"
+ "eor x12, x7, x8\n\t"
+ "and x16, x17, x16\n\t"
+ "and x12, x12, x6\n\t"
+ "add x9, x9, x13\n\t"
+ "eor x12, x12, x8\n\t"
+ "add x9, x9, x15\n\t"
+ "eor x16, x16, x11\n\t"
+ "add x9, x9, x12\n\t"
+ "add x14, x14, x16\n\t"
+ "add x5, x5, x9\n\t"
+ "add x9, x9, x14\n\t"
+ /* Round 3 */
+ "mov x13, v1.d[1]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ror x12, x5, #14\n\t"
+ "ror x14, x9, #28\n\t"
+ "eor x12, x12, x5, ror 18\n\t"
+ "eor x14, x14, x9, ror 34\n\t"
+ "eor x12, x12, x5, ror 41\n\t"
+ "eor x14, x14, x9, ror 39\n\t"
+ "add x8, x8, x12\n\t"
+ "eor x16, x9, x10\n\t"
+ "eor x12, x6, x7\n\t"
+ "and x17, x16, x17\n\t"
+ "and x12, x12, x5\n\t"
+ "add x8, x8, x13\n\t"
+ "eor x12, x12, x7\n\t"
+ "add x8, x8, x15\n\t"
+ "eor x17, x17, x10\n\t"
+ "add x8, x8, x12\n\t"
+ "add x14, x14, x17\n\t"
+ "add x4, x4, x8\n\t"
+ "add x8, x8, x14\n\t"
+ /* Round 4 */
+ "mov x13, v2.d[0]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ror x12, x4, #14\n\t"
+ "ror x14, x8, #28\n\t"
+ "eor x12, x12, x4, ror 18\n\t"
+ "eor x14, x14, x8, ror 34\n\t"
+ "eor x12, x12, x4, ror 41\n\t"
+ "eor x14, x14, x8, ror 39\n\t"
+ "add x7, x7, x12\n\t"
+ "eor x17, x8, x9\n\t"
+ "eor x12, x5, x6\n\t"
+ "and x16, x17, x16\n\t"
+ "and x12, x12, x4\n\t"
+ "add x7, x7, x13\n\t"
+ "eor x12, x12, x6\n\t"
+ "add x7, x7, x15\n\t"
+ "eor x16, x16, x9\n\t"
+ "add x7, x7, x12\n\t"
+ "add x14, x14, x16\n\t"
+ "add x11, x11, x7\n\t"
+ "add x7, x7, x14\n\t"
+ /* Round 5 */
+ "mov x13, v2.d[1]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ror x12, x11, #14\n\t"
+ "ror x14, x7, #28\n\t"
+ "eor x12, x12, x11, ror 18\n\t"
+ "eor x14, x14, x7, ror 34\n\t"
+ "eor x12, x12, x11, ror 41\n\t"
+ "eor x14, x14, x7, ror 39\n\t"
+ "add x6, x6, x12\n\t"
+ "eor x16, x7, x8\n\t"
+ "eor x12, x4, x5\n\t"
+ "and x17, x16, x17\n\t"
+ "and x12, x12, x11\n\t"
+ "add x6, x6, x13\n\t"
+ "eor x12, x12, x5\n\t"
+ "add x6, x6, x15\n\t"
+ "eor x17, x17, x8\n\t"
+ "add x6, x6, x12\n\t"
+ "add x14, x14, x17\n\t"
+ "add x10, x10, x6\n\t"
+ "add x6, x6, x14\n\t"
+ /* Round 6 */
+ "mov x13, v3.d[0]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ror x12, x10, #14\n\t"
+ "ror x14, x6, #28\n\t"
+ "eor x12, x12, x10, ror 18\n\t"
+ "eor x14, x14, x6, ror 34\n\t"
+ "eor x12, x12, x10, ror 41\n\t"
+ "eor x14, x14, x6, ror 39\n\t"
+ "add x5, x5, x12\n\t"
+ "eor x17, x6, x7\n\t"
+ "eor x12, x11, x4\n\t"
+ "and x16, x17, x16\n\t"
+ "and x12, x12, x10\n\t"
+ "add x5, x5, x13\n\t"
+ "eor x12, x12, x4\n\t"
+ "add x5, x5, x15\n\t"
+ "eor x16, x16, x7\n\t"
+ "add x5, x5, x12\n\t"
+ "add x14, x14, x16\n\t"
+ "add x9, x9, x5\n\t"
+ "add x5, x5, x14\n\t"
+ /* Round 7 */
+ "mov x13, v3.d[1]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ror x12, x9, #14\n\t"
+ "ror x14, x5, #28\n\t"
+ "eor x12, x12, x9, ror 18\n\t"
+ "eor x14, x14, x5, ror 34\n\t"
+ "eor x12, x12, x9, ror 41\n\t"
+ "eor x14, x14, x5, ror 39\n\t"
+ "add x4, x4, x12\n\t"
+ "eor x16, x5, x6\n\t"
+ "eor x12, x10, x11\n\t"
+ "and x17, x16, x17\n\t"
+ "and x12, x12, x9\n\t"
+ "add x4, x4, x13\n\t"
+ "eor x12, x12, x11\n\t"
+ "add x4, x4, x15\n\t"
+ "eor x17, x17, x6\n\t"
+ "add x4, x4, x12\n\t"
+ "add x14, x14, x17\n\t"
+ "add x8, x8, x4\n\t"
+ "add x4, x4, x14\n\t"
+ /* Round 8 */
+ "mov x13, v4.d[0]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ror x12, x8, #14\n\t"
+ "ror x14, x4, #28\n\t"
+ "eor x12, x12, x8, ror 18\n\t"
+ "eor x14, x14, x4, ror 34\n\t"
+ "eor x12, x12, x8, ror 41\n\t"
+ "eor x14, x14, x4, ror 39\n\t"
+ "add x11, x11, x12\n\t"
+ "eor x17, x4, x5\n\t"
+ "eor x12, x9, x10\n\t"
+ "and x16, x17, x16\n\t"
+ "and x12, x12, x8\n\t"
+ "add x11, x11, x13\n\t"
+ "eor x12, x12, x10\n\t"
+ "add x11, x11, x15\n\t"
+ "eor x16, x16, x5\n\t"
+ "add x11, x11, x12\n\t"
+ "add x14, x14, x16\n\t"
+ "add x7, x7, x11\n\t"
+ "add x11, x11, x14\n\t"
+ /* Round 9 */
+ "mov x13, v4.d[1]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ror x12, x7, #14\n\t"
+ "ror x14, x11, #28\n\t"
+ "eor x12, x12, x7, ror 18\n\t"
+ "eor x14, x14, x11, ror 34\n\t"
+ "eor x12, x12, x7, ror 41\n\t"
+ "eor x14, x14, x11, ror 39\n\t"
+ "add x10, x10, x12\n\t"
+ "eor x16, x11, x4\n\t"
+ "eor x12, x8, x9\n\t"
+ "and x17, x16, x17\n\t"
+ "and x12, x12, x7\n\t"
+ "add x10, x10, x13\n\t"
+ "eor x12, x12, x9\n\t"
+ "add x10, x10, x15\n\t"
+ "eor x17, x17, x4\n\t"
+ "add x10, x10, x12\n\t"
+ "add x14, x14, x17\n\t"
+ "add x6, x6, x10\n\t"
+ "add x10, x10, x14\n\t"
+ /* Round 10 */
+ "mov x13, v5.d[0]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ror x12, x6, #14\n\t"
+ "ror x14, x10, #28\n\t"
+ "eor x12, x12, x6, ror 18\n\t"
+ "eor x14, x14, x10, ror 34\n\t"
+ "eor x12, x12, x6, ror 41\n\t"
+ "eor x14, x14, x10, ror 39\n\t"
+ "add x9, x9, x12\n\t"
+ "eor x17, x10, x11\n\t"
+ "eor x12, x7, x8\n\t"
+ "and x16, x17, x16\n\t"
+ "and x12, x12, x6\n\t"
+ "add x9, x9, x13\n\t"
+ "eor x12, x12, x8\n\t"
+ "add x9, x9, x15\n\t"
+ "eor x16, x16, x11\n\t"
+ "add x9, x9, x12\n\t"
+ "add x14, x14, x16\n\t"
+ "add x5, x5, x9\n\t"
+ "add x9, x9, x14\n\t"
+ /* Round 11 */
+ "mov x13, v5.d[1]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ror x12, x5, #14\n\t"
+ "ror x14, x9, #28\n\t"
+ "eor x12, x12, x5, ror 18\n\t"
+ "eor x14, x14, x9, ror 34\n\t"
+ "eor x12, x12, x5, ror 41\n\t"
+ "eor x14, x14, x9, ror 39\n\t"
+ "add x8, x8, x12\n\t"
+ "eor x16, x9, x10\n\t"
+ "eor x12, x6, x7\n\t"
+ "and x17, x16, x17\n\t"
+ "and x12, x12, x5\n\t"
+ "add x8, x8, x13\n\t"
+ "eor x12, x12, x7\n\t"
+ "add x8, x8, x15\n\t"
+ "eor x17, x17, x10\n\t"
+ "add x8, x8, x12\n\t"
+ "add x14, x14, x17\n\t"
+ "add x4, x4, x8\n\t"
+ "add x8, x8, x14\n\t"
+ /* Round 12 */
+ "mov x13, v6.d[0]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ror x12, x4, #14\n\t"
+ "ror x14, x8, #28\n\t"
+ "eor x12, x12, x4, ror 18\n\t"
+ "eor x14, x14, x8, ror 34\n\t"
+ "eor x12, x12, x4, ror 41\n\t"
+ "eor x14, x14, x8, ror 39\n\t"
+ "add x7, x7, x12\n\t"
+ "eor x17, x8, x9\n\t"
+ "eor x12, x5, x6\n\t"
+ "and x16, x17, x16\n\t"
+ "and x12, x12, x4\n\t"
+ "add x7, x7, x13\n\t"
+ "eor x12, x12, x6\n\t"
+ "add x7, x7, x15\n\t"
+ "eor x16, x16, x9\n\t"
+ "add x7, x7, x12\n\t"
+ "add x14, x14, x16\n\t"
+ "add x11, x11, x7\n\t"
+ "add x7, x7, x14\n\t"
+ /* Round 13 */
+ "mov x13, v6.d[1]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ror x12, x11, #14\n\t"
+ "ror x14, x7, #28\n\t"
+ "eor x12, x12, x11, ror 18\n\t"
+ "eor x14, x14, x7, ror 34\n\t"
+ "eor x12, x12, x11, ror 41\n\t"
+ "eor x14, x14, x7, ror 39\n\t"
+ "add x6, x6, x12\n\t"
+ "eor x16, x7, x8\n\t"
+ "eor x12, x4, x5\n\t"
+ "and x17, x16, x17\n\t"
+ "and x12, x12, x11\n\t"
+ "add x6, x6, x13\n\t"
+ "eor x12, x12, x5\n\t"
+ "add x6, x6, x15\n\t"
+ "eor x17, x17, x8\n\t"
+ "add x6, x6, x12\n\t"
+ "add x14, x14, x17\n\t"
+ "add x10, x10, x6\n\t"
+ "add x6, x6, x14\n\t"
+ /* Round 14 */
+ "mov x13, v7.d[0]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ror x12, x10, #14\n\t"
+ "ror x14, x6, #28\n\t"
+ "eor x12, x12, x10, ror 18\n\t"
+ "eor x14, x14, x6, ror 34\n\t"
+ "eor x12, x12, x10, ror 41\n\t"
+ "eor x14, x14, x6, ror 39\n\t"
+ "add x5, x5, x12\n\t"
+ "eor x17, x6, x7\n\t"
+ "eor x12, x11, x4\n\t"
+ "and x16, x17, x16\n\t"
+ "and x12, x12, x10\n\t"
+ "add x5, x5, x13\n\t"
+ "eor x12, x12, x4\n\t"
+ "add x5, x5, x15\n\t"
+ "eor x16, x16, x7\n\t"
+ "add x5, x5, x12\n\t"
+ "add x14, x14, x16\n\t"
+ "add x9, x9, x5\n\t"
+ "add x5, x5, x14\n\t"
+ /* Round 15 */
+ "mov x13, v7.d[1]\n\t"
+ "ldr x15, [x3], #8\n\t"
+ "ror x12, x9, #14\n\t"
+ "ror x14, x5, #28\n\t"
+ "eor x12, x12, x9, ror 18\n\t"
+ "eor x14, x14, x5, ror 34\n\t"
+ "eor x12, x12, x9, ror 41\n\t"
+ "eor x14, x14, x5, ror 39\n\t"
+ "add x4, x4, x12\n\t"
+ "eor x16, x5, x6\n\t"
+ "eor x12, x10, x11\n\t"
+ "and x17, x16, x17\n\t"
+ "and x12, x12, x9\n\t"
+ "add x4, x4, x13\n\t"
+ "eor x12, x12, x11\n\t"
+ "add x4, x4, x15\n\t"
+ "eor x17, x17, x6\n\t"
+ "add x4, x4, x12\n\t"
+ "add x14, x14, x17\n\t"
+ "add x8, x8, x4\n\t"
+ "add x4, x4, x14\n\t"
+ "add x11, x11, x26\n\t"
+ "add x10, x10, x25\n\t"
+ "add x9, x9, x24\n\t"
+ "add x8, x8, x23\n\t"
+ "add x7, x7, x22\n\t"
+ "add x6, x6, x21\n\t"
+ "add x5, x5, x20\n\t"
+ "add x4, x4, x19\n\t"
+ "adr x3, %[L_SHA512_transform_neon_len_k]\n\t"
+ "subs %w[len], %w[len], #0x80\n\t"
+ "bne L_sha512_len_neon_begin_%=\n\t"
+ "stp x4, x5, [%x[sha512]]\n\t"
+ "stp x6, x7, [%x[sha512], #16]\n\t"
+ "stp x8, x9, [%x[sha512], #32]\n\t"
+ "stp x10, x11, [%x[sha512], #48]\n\t"
+ "ldp x29, x30, [sp], #16\n\t"
+ : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len)
+ : [L_SHA512_transform_neon_len_k] "S" (L_SHA512_transform_neon_len_k), [L_SHA512_transform_neon_len_ror8] "S" (L_SHA512_transform_neon_len_ror8)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11"
+ );
+}
+
+#endif /* WOLFSSL_ARMASM */
+#endif /* __aarch64__ */