/* armv8-sha512-asm * * Copyright (C) 2006-2020 wolfSSL Inc. * * This file is part of wolfSSL. * * wolfSSL is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * wolfSSL is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA */ /* Generated using (from wolfssl): * cd ../scripts * ruby ./sha2/sha512.rb arm64 ../wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.c */ #ifdef __aarch64__ #include #ifdef HAVE_CONFIG_H #include #endif #include #ifdef WOLFSSL_ARMASM #include static const uint64_t L_SHA512_transform_neon_len_k[] = { 0x428a2f98d728ae22UL, 0x7137449123ef65cdUL, 0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL, 0x3956c25bf348b538UL, 0x59f111f1b605d019UL, 0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL, 0xd807aa98a3030242UL, 0x12835b0145706fbeUL, 0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL, 0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL, 0x9bdc06a725c71235UL, 0xc19bf174cf692694UL, 0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL, 0xfc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL, 0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL, 0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL, 0x983e5152ee66dfabUL, 0xa831c66d2db43210UL, 0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL, 0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL, 0x6ca6351e003826fUL, 0x142929670a0e6e70UL, 0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL, 0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL, 0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL, 0x81c2c92e47edaee6UL, 0x92722c851482353bUL, 0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL, 0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL, 0xd192e819d6ef5218UL, 0xd69906245565a910UL, 0xf40e35855771202aUL, 0x106aa07032bbd1b8UL, 0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL, 0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL, 0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL, 0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL, 0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL, 0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL, 0x90befffa23631e28UL, 0xa4506cebde82bde9UL, 0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL, 0xca273eceea26619cUL, 0xd186b8c721c0c207UL, 0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL, 0x6f067aa72176fbaUL, 0xa637dc5a2c898a6UL, 0x113f9804bef90daeUL, 0x1b710b35131c471bUL, 0x28db77f523047d84UL, 0x32caab7b40c72493UL, 0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL, 0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL, 0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL, }; static const uint64_t L_SHA512_transform_neon_len_ror8[] = { 0x7060504030201UL, 0x80f0e0d0c0b0a09UL, }; void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len) { __asm__ __volatile__ ( "stp x29, x30, [sp, #-16]!\n\t" "add x29, sp, #0\n\t" "adr x3, %[L_SHA512_transform_neon_len_k]\n\t" "adr x27, %[L_SHA512_transform_neon_len_ror8]\n\t" "ld1 {v11.16b}, [x27]\n\t" /* Load digest into working vars */ "ldp x4, x5, [%x[sha512]]\n\t" "ldp x6, x7, [%x[sha512], #16]\n\t" "ldp x8, x9, [%x[sha512], #32]\n\t" "ldp x10, x11, [%x[sha512], #48]\n\t" /* Start of loop processing a block */ "\n" "L_sha512_len_neon_begin_%=: \n\t" /* Load W */ /* Copy digest to add in at end */ "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[data]], #0x40\n\t" "mov x19, x4\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[data]], #0x40\n\t" "mov x20, x5\n\t" "rev64 v0.16b, v0.16b\n\t" "mov x21, x6\n\t" "rev64 v1.16b, v1.16b\n\t" "mov x22, x7\n\t" "rev64 v2.16b, v2.16b\n\t" "mov x23, x8\n\t" "rev64 v3.16b, v3.16b\n\t" "mov x24, x9\n\t" "rev64 v4.16b, v4.16b\n\t" "mov x25, x10\n\t" "rev64 v5.16b, v5.16b\n\t" "mov x26, x11\n\t" "rev64 v6.16b, v6.16b\n\t" "rev64 v7.16b, v7.16b\n\t" /* Pre-calc: b ^ c */ "eor x16, x5, x6\n\t" "mov x27, #4\n\t" /* Start of 16 rounds */ "\n" "L_sha512_len_neon_start_%=: \n\t" /* Round 0 */ "mov x13, v0.d[0]\n\t" "ldr x15, [x3], #8\n\t" "ror x12, x8, #14\n\t" "ror x14, x4, #28\n\t" "eor x12, x12, x8, ror 18\n\t" "eor x14, x14, x4, ror 34\n\t" "eor x12, x12, x8, ror 41\n\t" "eor x14, x14, x4, ror 39\n\t" "add x11, x11, x12\n\t" "eor x17, x4, x5\n\t" "eor x12, x9, x10\n\t" "and x16, x17, x16\n\t" "and x12, x12, x8\n\t" "add x11, x11, x13\n\t" "eor x12, x12, x10\n\t" "add x11, x11, x15\n\t" "eor x16, x16, x5\n\t" "add x11, x11, x12\n\t" "add x14, x14, x16\n\t" "add x7, x7, x11\n\t" "add x11, x11, x14\n\t" /* Round 1 */ "mov x13, v0.d[1]\n\t" "ldr x15, [x3], #8\n\t" "ext v10.16b, v0.16b, v1.16b, #8\n\t" "ror x12, x7, #14\n\t" "shl v8.2d, v7.2d, #45\n\t" "ror x14, x11, #28\n\t" "sri v8.2d, v7.2d, #19\n\t" "eor x12, x12, x7, ror 18\n\t" "shl v9.2d, v7.2d, #3\n\t" "eor x14, x14, x11, ror 34\n\t" "sri v9.2d, v7.2d, #61\n\t" "eor x12, x12, x7, ror 41\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x14, x14, x11, ror 39\n\t" "ushr v8.2d, v7.2d, #6\n\t" "add x10, x10, x12\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x16, x11, x4\n\t" "add v0.2d, v0.2d, v9.2d\n\t" "eor x12, x8, x9\n\t" "ext v9.16b, v4.16b, v5.16b, #8\n\t" "and x17, x16, x17\n\t" "add v0.2d, v0.2d, v9.2d\n\t" "and x12, x12, x7\n\t" "shl v8.2d, v10.2d, #63\n\t" "add x10, x10, x13\n\t" "sri v8.2d, v10.2d, #1\n\t" "eor x12, x12, x9\n\t" "tbl v9.16b, {v10.16b}, v11.16b\n\t" "add x10, x10, x15\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x17, x17, x4\n\t" "ushr v10.2d, v10.2d, #7\n\t" "add x10, x10, x12\n\t" "eor v9.16b, v9.16b, v10.16b\n\t" "add x14, x14, x17\n\t" "add v0.2d, v0.2d, v9.2d\n\t" "add x6, x6, x10\n\t" "add x10, x10, x14\n\t" /* Round 2 */ "mov x13, v1.d[0]\n\t" "ldr x15, [x3], #8\n\t" "ror x12, x6, #14\n\t" "ror x14, x10, #28\n\t" "eor x12, x12, x6, ror 18\n\t" "eor x14, x14, x10, ror 34\n\t" "eor x12, x12, x6, ror 41\n\t" "eor x14, x14, x10, ror 39\n\t" "add x9, x9, x12\n\t" "eor x17, x10, x11\n\t" "eor x12, x7, x8\n\t" "and x16, x17, x16\n\t" "and x12, x12, x6\n\t" "add x9, x9, x13\n\t" "eor x12, x12, x8\n\t" "add x9, x9, x15\n\t" "eor x16, x16, x11\n\t" "add x9, x9, x12\n\t" "add x14, x14, x16\n\t" "add x5, x5, x9\n\t" "add x9, x9, x14\n\t" /* Round 3 */ "mov x13, v1.d[1]\n\t" "ldr x15, [x3], #8\n\t" "ext v10.16b, v1.16b, v2.16b, #8\n\t" "ror x12, x5, #14\n\t" "shl v8.2d, v0.2d, #45\n\t" "ror x14, x9, #28\n\t" "sri v8.2d, v0.2d, #19\n\t" "eor x12, x12, x5, ror 18\n\t" "shl v9.2d, v0.2d, #3\n\t" "eor x14, x14, x9, ror 34\n\t" "sri v9.2d, v0.2d, #61\n\t" "eor x12, x12, x5, ror 41\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x14, x14, x9, ror 39\n\t" "ushr v8.2d, v0.2d, #6\n\t" "add x8, x8, x12\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x16, x9, x10\n\t" "add v1.2d, v1.2d, v9.2d\n\t" "eor x12, x6, x7\n\t" "ext v9.16b, v5.16b, v6.16b, #8\n\t" "and x17, x16, x17\n\t" "add v1.2d, v1.2d, v9.2d\n\t" "and x12, x12, x5\n\t" "shl v8.2d, v10.2d, #63\n\t" "add x8, x8, x13\n\t" "sri v8.2d, v10.2d, #1\n\t" "eor x12, x12, x7\n\t" "tbl v9.16b, {v10.16b}, v11.16b\n\t" "add x8, x8, x15\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x17, x17, x10\n\t" "ushr v10.2d, v10.2d, #7\n\t" "add x8, x8, x12\n\t" "eor v9.16b, v9.16b, v10.16b\n\t" "add x14, x14, x17\n\t" "add v1.2d, v1.2d, v9.2d\n\t" "add x4, x4, x8\n\t" "add x8, x8, x14\n\t" /* Round 4 */ "mov x13, v2.d[0]\n\t" "ldr x15, [x3], #8\n\t" "ror x12, x4, #14\n\t" "ror x14, x8, #28\n\t" "eor x12, x12, x4, ror 18\n\t" "eor x14, x14, x8, ror 34\n\t" "eor x12, x12, x4, ror 41\n\t" "eor x14, x14, x8, ror 39\n\t" "add x7, x7, x12\n\t" "eor x17, x8, x9\n\t" "eor x12, x5, x6\n\t" "and x16, x17, x16\n\t" "and x12, x12, x4\n\t" "add x7, x7, x13\n\t" "eor x12, x12, x6\n\t" "add x7, x7, x15\n\t" "eor x16, x16, x9\n\t" "add x7, x7, x12\n\t" "add x14, x14, x16\n\t" "add x11, x11, x7\n\t" "add x7, x7, x14\n\t" /* Round 5 */ "mov x13, v2.d[1]\n\t" "ldr x15, [x3], #8\n\t" "ext v10.16b, v2.16b, v3.16b, #8\n\t" "ror x12, x11, #14\n\t" "shl v8.2d, v1.2d, #45\n\t" "ror x14, x7, #28\n\t" "sri v8.2d, v1.2d, #19\n\t" "eor x12, x12, x11, ror 18\n\t" "shl v9.2d, v1.2d, #3\n\t" "eor x14, x14, x7, ror 34\n\t" "sri v9.2d, v1.2d, #61\n\t" "eor x12, x12, x11, ror 41\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x14, x14, x7, ror 39\n\t" "ushr v8.2d, v1.2d, #6\n\t" "add x6, x6, x12\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x16, x7, x8\n\t" "add v2.2d, v2.2d, v9.2d\n\t" "eor x12, x4, x5\n\t" "ext v9.16b, v6.16b, v7.16b, #8\n\t" "and x17, x16, x17\n\t" "add v2.2d, v2.2d, v9.2d\n\t" "and x12, x12, x11\n\t" "shl v8.2d, v10.2d, #63\n\t" "add x6, x6, x13\n\t" "sri v8.2d, v10.2d, #1\n\t" "eor x12, x12, x5\n\t" "tbl v9.16b, {v10.16b}, v11.16b\n\t" "add x6, x6, x15\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x17, x17, x8\n\t" "ushr v10.2d, v10.2d, #7\n\t" "add x6, x6, x12\n\t" "eor v9.16b, v9.16b, v10.16b\n\t" "add x14, x14, x17\n\t" "add v2.2d, v2.2d, v9.2d\n\t" "add x10, x10, x6\n\t" "add x6, x6, x14\n\t" /* Round 6 */ "mov x13, v3.d[0]\n\t" "ldr x15, [x3], #8\n\t" "ror x12, x10, #14\n\t" "ror x14, x6, #28\n\t" "eor x12, x12, x10, ror 18\n\t" "eor x14, x14, x6, ror 34\n\t" "eor x12, x12, x10, ror 41\n\t" "eor x14, x14, x6, ror 39\n\t" "add x5, x5, x12\n\t" "eor x17, x6, x7\n\t" "eor x12, x11, x4\n\t" "and x16, x17, x16\n\t" "and x12, x12, x10\n\t" "add x5, x5, x13\n\t" "eor x12, x12, x4\n\t" "add x5, x5, x15\n\t" "eor x16, x16, x7\n\t" "add x5, x5, x12\n\t" "add x14, x14, x16\n\t" "add x9, x9, x5\n\t" "add x5, x5, x14\n\t" /* Round 7 */ "mov x13, v3.d[1]\n\t" "ldr x15, [x3], #8\n\t" "ext v10.16b, v3.16b, v4.16b, #8\n\t" "ror x12, x9, #14\n\t" "shl v8.2d, v2.2d, #45\n\t" "ror x14, x5, #28\n\t" "sri v8.2d, v2.2d, #19\n\t" "eor x12, x12, x9, ror 18\n\t" "shl v9.2d, v2.2d, #3\n\t" "eor x14, x14, x5, ror 34\n\t" "sri v9.2d, v2.2d, #61\n\t" "eor x12, x12, x9, ror 41\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x14, x14, x5, ror 39\n\t" "ushr v8.2d, v2.2d, #6\n\t" "add x4, x4, x12\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x16, x5, x6\n\t" "add v3.2d, v3.2d, v9.2d\n\t" "eor x12, x10, x11\n\t" "ext v9.16b, v7.16b, v0.16b, #8\n\t" "and x17, x16, x17\n\t" "add v3.2d, v3.2d, v9.2d\n\t" "and x12, x12, x9\n\t" "shl v8.2d, v10.2d, #63\n\t" "add x4, x4, x13\n\t" "sri v8.2d, v10.2d, #1\n\t" "eor x12, x12, x11\n\t" "tbl v9.16b, {v10.16b}, v11.16b\n\t" "add x4, x4, x15\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x17, x17, x6\n\t" "ushr v10.2d, v10.2d, #7\n\t" "add x4, x4, x12\n\t" "eor v9.16b, v9.16b, v10.16b\n\t" "add x14, x14, x17\n\t" "add v3.2d, v3.2d, v9.2d\n\t" "add x8, x8, x4\n\t" "add x4, x4, x14\n\t" /* Round 8 */ "mov x13, v4.d[0]\n\t" "ldr x15, [x3], #8\n\t" "ror x12, x8, #14\n\t" "ror x14, x4, #28\n\t" "eor x12, x12, x8, ror 18\n\t" "eor x14, x14, x4, ror 34\n\t" "eor x12, x12, x8, ror 41\n\t" "eor x14, x14, x4, ror 39\n\t" "add x11, x11, x12\n\t" "eor x17, x4, x5\n\t" "eor x12, x9, x10\n\t" "and x16, x17, x16\n\t" "and x12, x12, x8\n\t" "add x11, x11, x13\n\t" "eor x12, x12, x10\n\t" "add x11, x11, x15\n\t" "eor x16, x16, x5\n\t" "add x11, x11, x12\n\t" "add x14, x14, x16\n\t" "add x7, x7, x11\n\t" "add x11, x11, x14\n\t" /* Round 9 */ "mov x13, v4.d[1]\n\t" "ldr x15, [x3], #8\n\t" "ext v10.16b, v4.16b, v5.16b, #8\n\t" "ror x12, x7, #14\n\t" "shl v8.2d, v3.2d, #45\n\t" "ror x14, x11, #28\n\t" "sri v8.2d, v3.2d, #19\n\t" "eor x12, x12, x7, ror 18\n\t" "shl v9.2d, v3.2d, #3\n\t" "eor x14, x14, x11, ror 34\n\t" "sri v9.2d, v3.2d, #61\n\t" "eor x12, x12, x7, ror 41\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x14, x14, x11, ror 39\n\t" "ushr v8.2d, v3.2d, #6\n\t" "add x10, x10, x12\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x16, x11, x4\n\t" "add v4.2d, v4.2d, v9.2d\n\t" "eor x12, x8, x9\n\t" "ext v9.16b, v0.16b, v1.16b, #8\n\t" "and x17, x16, x17\n\t" "add v4.2d, v4.2d, v9.2d\n\t" "and x12, x12, x7\n\t" "shl v8.2d, v10.2d, #63\n\t" "add x10, x10, x13\n\t" "sri v8.2d, v10.2d, #1\n\t" "eor x12, x12, x9\n\t" "tbl v9.16b, {v10.16b}, v11.16b\n\t" "add x10, x10, x15\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x17, x17, x4\n\t" "ushr v10.2d, v10.2d, #7\n\t" "add x10, x10, x12\n\t" "eor v9.16b, v9.16b, v10.16b\n\t" "add x14, x14, x17\n\t" "add v4.2d, v4.2d, v9.2d\n\t" "add x6, x6, x10\n\t" "add x10, x10, x14\n\t" /* Round 10 */ "mov x13, v5.d[0]\n\t" "ldr x15, [x3], #8\n\t" "ror x12, x6, #14\n\t" "ror x14, x10, #28\n\t" "eor x12, x12, x6, ror 18\n\t" "eor x14, x14, x10, ror 34\n\t" "eor x12, x12, x6, ror 41\n\t" "eor x14, x14, x10, ror 39\n\t" "add x9, x9, x12\n\t" "eor x17, x10, x11\n\t" "eor x12, x7, x8\n\t" "and x16, x17, x16\n\t" "and x12, x12, x6\n\t" "add x9, x9, x13\n\t" "eor x12, x12, x8\n\t" "add x9, x9, x15\n\t" "eor x16, x16, x11\n\t" "add x9, x9, x12\n\t" "add x14, x14, x16\n\t" "add x5, x5, x9\n\t" "add x9, x9, x14\n\t" /* Round 11 */ "mov x13, v5.d[1]\n\t" "ldr x15, [x3], #8\n\t" "ext v10.16b, v5.16b, v6.16b, #8\n\t" "ror x12, x5, #14\n\t" "shl v8.2d, v4.2d, #45\n\t" "ror x14, x9, #28\n\t" "sri v8.2d, v4.2d, #19\n\t" "eor x12, x12, x5, ror 18\n\t" "shl v9.2d, v4.2d, #3\n\t" "eor x14, x14, x9, ror 34\n\t" "sri v9.2d, v4.2d, #61\n\t" "eor x12, x12, x5, ror 41\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x14, x14, x9, ror 39\n\t" "ushr v8.2d, v4.2d, #6\n\t" "add x8, x8, x12\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x16, x9, x10\n\t" "add v5.2d, v5.2d, v9.2d\n\t" "eor x12, x6, x7\n\t" "ext v9.16b, v1.16b, v2.16b, #8\n\t" "and x17, x16, x17\n\t" "add v5.2d, v5.2d, v9.2d\n\t" "and x12, x12, x5\n\t" "shl v8.2d, v10.2d, #63\n\t" "add x8, x8, x13\n\t" "sri v8.2d, v10.2d, #1\n\t" "eor x12, x12, x7\n\t" "tbl v9.16b, {v10.16b}, v11.16b\n\t" "add x8, x8, x15\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x17, x17, x10\n\t" "ushr v10.2d, v10.2d, #7\n\t" "add x8, x8, x12\n\t" "eor v9.16b, v9.16b, v10.16b\n\t" "add x14, x14, x17\n\t" "add v5.2d, v5.2d, v9.2d\n\t" "add x4, x4, x8\n\t" "add x8, x8, x14\n\t" /* Round 12 */ "mov x13, v6.d[0]\n\t" "ldr x15, [x3], #8\n\t" "ror x12, x4, #14\n\t" "ror x14, x8, #28\n\t" "eor x12, x12, x4, ror 18\n\t" "eor x14, x14, x8, ror 34\n\t" "eor x12, x12, x4, ror 41\n\t" "eor x14, x14, x8, ror 39\n\t" "add x7, x7, x12\n\t" "eor x17, x8, x9\n\t" "eor x12, x5, x6\n\t" "and x16, x17, x16\n\t" "and x12, x12, x4\n\t" "add x7, x7, x13\n\t" "eor x12, x12, x6\n\t" "add x7, x7, x15\n\t" "eor x16, x16, x9\n\t" "add x7, x7, x12\n\t" "add x14, x14, x16\n\t" "add x11, x11, x7\n\t" "add x7, x7, x14\n\t" /* Round 13 */ "mov x13, v6.d[1]\n\t" "ldr x15, [x3], #8\n\t" "ext v10.16b, v6.16b, v7.16b, #8\n\t" "ror x12, x11, #14\n\t" "shl v8.2d, v5.2d, #45\n\t" "ror x14, x7, #28\n\t" "sri v8.2d, v5.2d, #19\n\t" "eor x12, x12, x11, ror 18\n\t" "shl v9.2d, v5.2d, #3\n\t" "eor x14, x14, x7, ror 34\n\t" "sri v9.2d, v5.2d, #61\n\t" "eor x12, x12, x11, ror 41\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x14, x14, x7, ror 39\n\t" "ushr v8.2d, v5.2d, #6\n\t" "add x6, x6, x12\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x16, x7, x8\n\t" "add v6.2d, v6.2d, v9.2d\n\t" "eor x12, x4, x5\n\t" "ext v9.16b, v2.16b, v3.16b, #8\n\t" "and x17, x16, x17\n\t" "add v6.2d, v6.2d, v9.2d\n\t" "and x12, x12, x11\n\t" "shl v8.2d, v10.2d, #63\n\t" "add x6, x6, x13\n\t" "sri v8.2d, v10.2d, #1\n\t" "eor x12, x12, x5\n\t" "tbl v9.16b, {v10.16b}, v11.16b\n\t" "add x6, x6, x15\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x17, x17, x8\n\t" "ushr v10.2d, v10.2d, #7\n\t" "add x6, x6, x12\n\t" "eor v9.16b, v9.16b, v10.16b\n\t" "add x14, x14, x17\n\t" "add v6.2d, v6.2d, v9.2d\n\t" "add x10, x10, x6\n\t" "add x6, x6, x14\n\t" /* Round 14 */ "mov x13, v7.d[0]\n\t" "ldr x15, [x3], #8\n\t" "ror x12, x10, #14\n\t" "ror x14, x6, #28\n\t" "eor x12, x12, x10, ror 18\n\t" "eor x14, x14, x6, ror 34\n\t" "eor x12, x12, x10, ror 41\n\t" "eor x14, x14, x6, ror 39\n\t" "add x5, x5, x12\n\t" "eor x17, x6, x7\n\t" "eor x12, x11, x4\n\t" "and x16, x17, x16\n\t" "and x12, x12, x10\n\t" "add x5, x5, x13\n\t" "eor x12, x12, x4\n\t" "add x5, x5, x15\n\t" "eor x16, x16, x7\n\t" "add x5, x5, x12\n\t" "add x14, x14, x16\n\t" "add x9, x9, x5\n\t" "add x5, x5, x14\n\t" /* Round 15 */ "mov x13, v7.d[1]\n\t" "ldr x15, [x3], #8\n\t" "ext v10.16b, v7.16b, v0.16b, #8\n\t" "ror x12, x9, #14\n\t" "shl v8.2d, v6.2d, #45\n\t" "ror x14, x5, #28\n\t" "sri v8.2d, v6.2d, #19\n\t" "eor x12, x12, x9, ror 18\n\t" "shl v9.2d, v6.2d, #3\n\t" "eor x14, x14, x5, ror 34\n\t" "sri v9.2d, v6.2d, #61\n\t" "eor x12, x12, x9, ror 41\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x14, x14, x5, ror 39\n\t" "ushr v8.2d, v6.2d, #6\n\t" "add x4, x4, x12\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x16, x5, x6\n\t" "add v7.2d, v7.2d, v9.2d\n\t" "eor x12, x10, x11\n\t" "ext v9.16b, v3.16b, v4.16b, #8\n\t" "and x17, x16, x17\n\t" "add v7.2d, v7.2d, v9.2d\n\t" "and x12, x12, x9\n\t" "shl v8.2d, v10.2d, #63\n\t" "add x4, x4, x13\n\t" "sri v8.2d, v10.2d, #1\n\t" "eor x12, x12, x11\n\t" "tbl v9.16b, {v10.16b}, v11.16b\n\t" "add x4, x4, x15\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x17, x17, x6\n\t" "ushr v10.2d, v10.2d, #7\n\t" "add x4, x4, x12\n\t" "eor v9.16b, v9.16b, v10.16b\n\t" "add x14, x14, x17\n\t" "add v7.2d, v7.2d, v9.2d\n\t" "add x8, x8, x4\n\t" "add x4, x4, x14\n\t" "subs x27, x27, #1\n\t" "bne L_sha512_len_neon_start_%=\n\t" /* Round 0 */ "mov x13, v0.d[0]\n\t" "ldr x15, [x3], #8\n\t" "ror x12, x8, #14\n\t" "ror x14, x4, #28\n\t" "eor x12, x12, x8, ror 18\n\t" "eor x14, x14, x4, ror 34\n\t" "eor x12, x12, x8, ror 41\n\t" "eor x14, x14, x4, ror 39\n\t" "add x11, x11, x12\n\t" "eor x17, x4, x5\n\t" "eor x12, x9, x10\n\t" "and x16, x17, x16\n\t" "and x12, x12, x8\n\t" "add x11, x11, x13\n\t" "eor x12, x12, x10\n\t" "add x11, x11, x15\n\t" "eor x16, x16, x5\n\t" "add x11, x11, x12\n\t" "add x14, x14, x16\n\t" "add x7, x7, x11\n\t" "add x11, x11, x14\n\t" /* Round 1 */ "mov x13, v0.d[1]\n\t" "ldr x15, [x3], #8\n\t" "ror x12, x7, #14\n\t" "ror x14, x11, #28\n\t" "eor x12, x12, x7, ror 18\n\t" "eor x14, x14, x11, ror 34\n\t" "eor x12, x12, x7, ror 41\n\t" "eor x14, x14, x11, ror 39\n\t" "add x10, x10, x12\n\t" "eor x16, x11, x4\n\t" "eor x12, x8, x9\n\t" "and x17, x16, x17\n\t" "and x12, x12, x7\n\t" "add x10, x10, x13\n\t" "eor x12, x12, x9\n\t" "add x10, x10, x15\n\t" "eor x17, x17, x4\n\t" "add x10, x10, x12\n\t" "add x14, x14, x17\n\t" "add x6, x6, x10\n\t" "add x10, x10, x14\n\t" /* Round 2 */ "mov x13, v1.d[0]\n\t" "ldr x15, [x3], #8\n\t" "ror x12, x6, #14\n\t" "ror x14, x10, #28\n\t" "eor x12, x12, x6, ror 18\n\t" "eor x14, x14, x10, ror 34\n\t" "eor x12, x12, x6, ror 41\n\t" "eor x14, x14, x10, ror 39\n\t" "add x9, x9, x12\n\t" "eor x17, x10, x11\n\t" "eor x12, x7, x8\n\t" "and x16, x17, x16\n\t" "and x12, x12, x6\n\t" "add x9, x9, x13\n\t" "eor x12, x12, x8\n\t" "add x9, x9, x15\n\t" "eor x16, x16, x11\n\t" "add x9, x9, x12\n\t" "add x14, x14, x16\n\t" "add x5, x5, x9\n\t" "add x9, x9, x14\n\t" /* Round 3 */ "mov x13, v1.d[1]\n\t" "ldr x15, [x3], #8\n\t" "ror x12, x5, #14\n\t" "ror x14, x9, #28\n\t" "eor x12, x12, x5, ror 18\n\t" "eor x14, x14, x9, ror 34\n\t" "eor x12, x12, x5, ror 41\n\t" "eor x14, x14, x9, ror 39\n\t" "add x8, x8, x12\n\t" "eor x16, x9, x10\n\t" "eor x12, x6, x7\n\t" "and x17, x16, x17\n\t" "and x12, x12, x5\n\t" "add x8, x8, x13\n\t" "eor x12, x12, x7\n\t" "add x8, x8, x15\n\t" "eor x17, x17, x10\n\t" "add x8, x8, x12\n\t" "add x14, x14, x17\n\t" "add x4, x4, x8\n\t" "add x8, x8, x14\n\t" /* Round 4 */ "mov x13, v2.d[0]\n\t" "ldr x15, [x3], #8\n\t" "ror x12, x4, #14\n\t" "ror x14, x8, #28\n\t" "eor x12, x12, x4, ror 18\n\t" "eor x14, x14, x8, ror 34\n\t" "eor x12, x12, x4, ror 41\n\t" "eor x14, x14, x8, ror 39\n\t" "add x7, x7, x12\n\t" "eor x17, x8, x9\n\t" "eor x12, x5, x6\n\t" "and x16, x17, x16\n\t" "and x12, x12, x4\n\t" "add x7, x7, x13\n\t" "eor x12, x12, x6\n\t" "add x7, x7, x15\n\t" "eor x16, x16, x9\n\t" "add x7, x7, x12\n\t" "add x14, x14, x16\n\t" "add x11, x11, x7\n\t" "add x7, x7, x14\n\t" /* Round 5 */ "mov x13, v2.d[1]\n\t" "ldr x15, [x3], #8\n\t" "ror x12, x11, #14\n\t" "ror x14, x7, #28\n\t" "eor x12, x12, x11, ror 18\n\t" "eor x14, x14, x7, ror 34\n\t" "eor x12, x12, x11, ror 41\n\t" "eor x14, x14, x7, ror 39\n\t" "add x6, x6, x12\n\t" "eor x16, x7, x8\n\t" "eor x12, x4, x5\n\t" "and x17, x16, x17\n\t" "and x12, x12, x11\n\t" "add x6, x6, x13\n\t" "eor x12, x12, x5\n\t" "add x6, x6, x15\n\t" "eor x17, x17, x8\n\t" "add x6, x6, x12\n\t" "add x14, x14, x17\n\t" "add x10, x10, x6\n\t" "add x6, x6, x14\n\t" /* Round 6 */ "mov x13, v3.d[0]\n\t" "ldr x15, [x3], #8\n\t" "ror x12, x10, #14\n\t" "ror x14, x6, #28\n\t" "eor x12, x12, x10, ror 18\n\t" "eor x14, x14, x6, ror 34\n\t" "eor x12, x12, x10, ror 41\n\t" "eor x14, x14, x6, ror 39\n\t" "add x5, x5, x12\n\t" "eor x17, x6, x7\n\t" "eor x12, x11, x4\n\t" "and x16, x17, x16\n\t" "and x12, x12, x10\n\t" "add x5, x5, x13\n\t" "eor x12, x12, x4\n\t" "add x5, x5, x15\n\t" "eor x16, x16, x7\n\t" "add x5, x5, x12\n\t" "add x14, x14, x16\n\t" "add x9, x9, x5\n\t" "add x5, x5, x14\n\t" /* Round 7 */ "mov x13, v3.d[1]\n\t" "ldr x15, [x3], #8\n\t" "ror x12, x9, #14\n\t" "ror x14, x5, #28\n\t" "eor x12, x12, x9, ror 18\n\t" "eor x14, x14, x5, ror 34\n\t" "eor x12, x12, x9, ror 41\n\t" "eor x14, x14, x5, ror 39\n\t" "add x4, x4, x12\n\t" "eor x16, x5, x6\n\t" "eor x12, x10, x11\n\t" "and x17, x16, x17\n\t" "and x12, x12, x9\n\t" "add x4, x4, x13\n\t" "eor x12, x12, x11\n\t" "add x4, x4, x15\n\t" "eor x17, x17, x6\n\t" "add x4, x4, x12\n\t" "add x14, x14, x17\n\t" "add x8, x8, x4\n\t" "add x4, x4, x14\n\t" /* Round 8 */ "mov x13, v4.d[0]\n\t" "ldr x15, [x3], #8\n\t" "ror x12, x8, #14\n\t" "ror x14, x4, #28\n\t" "eor x12, x12, x8, ror 18\n\t" "eor x14, x14, x4, ror 34\n\t" "eor x12, x12, x8, ror 41\n\t" "eor x14, x14, x4, ror 39\n\t" "add x11, x11, x12\n\t" "eor x17, x4, x5\n\t" "eor x12, x9, x10\n\t" "and x16, x17, x16\n\t" "and x12, x12, x8\n\t" "add x11, x11, x13\n\t" "eor x12, x12, x10\n\t" "add x11, x11, x15\n\t" "eor x16, x16, x5\n\t" "add x11, x11, x12\n\t" "add x14, x14, x16\n\t" "add x7, x7, x11\n\t" "add x11, x11, x14\n\t" /* Round 9 */ "mov x13, v4.d[1]\n\t" "ldr x15, [x3], #8\n\t" "ror x12, x7, #14\n\t" "ror x14, x11, #28\n\t" "eor x12, x12, x7, ror 18\n\t" "eor x14, x14, x11, ror 34\n\t" "eor x12, x12, x7, ror 41\n\t" "eor x14, x14, x11, ror 39\n\t" "add x10, x10, x12\n\t" "eor x16, x11, x4\n\t" "eor x12, x8, x9\n\t" "and x17, x16, x17\n\t" "and x12, x12, x7\n\t" "add x10, x10, x13\n\t" "eor x12, x12, x9\n\t" "add x10, x10, x15\n\t" "eor x17, x17, x4\n\t" "add x10, x10, x12\n\t" "add x14, x14, x17\n\t" "add x6, x6, x10\n\t" "add x10, x10, x14\n\t" /* Round 10 */ "mov x13, v5.d[0]\n\t" "ldr x15, [x3], #8\n\t" "ror x12, x6, #14\n\t" "ror x14, x10, #28\n\t" "eor x12, x12, x6, ror 18\n\t" "eor x14, x14, x10, ror 34\n\t" "eor x12, x12, x6, ror 41\n\t" "eor x14, x14, x10, ror 39\n\t" "add x9, x9, x12\n\t" "eor x17, x10, x11\n\t" "eor x12, x7, x8\n\t" "and x16, x17, x16\n\t" "and x12, x12, x6\n\t" "add x9, x9, x13\n\t" "eor x12, x12, x8\n\t" "add x9, x9, x15\n\t" "eor x16, x16, x11\n\t" "add x9, x9, x12\n\t" "add x14, x14, x16\n\t" "add x5, x5, x9\n\t" "add x9, x9, x14\n\t" /* Round 11 */ "mov x13, v5.d[1]\n\t" "ldr x15, [x3], #8\n\t" "ror x12, x5, #14\n\t" "ror x14, x9, #28\n\t" "eor x12, x12, x5, ror 18\n\t" "eor x14, x14, x9, ror 34\n\t" "eor x12, x12, x5, ror 41\n\t" "eor x14, x14, x9, ror 39\n\t" "add x8, x8, x12\n\t" "eor x16, x9, x10\n\t" "eor x12, x6, x7\n\t" "and x17, x16, x17\n\t" "and x12, x12, x5\n\t" "add x8, x8, x13\n\t" "eor x12, x12, x7\n\t" "add x8, x8, x15\n\t" "eor x17, x17, x10\n\t" "add x8, x8, x12\n\t" "add x14, x14, x17\n\t" "add x4, x4, x8\n\t" "add x8, x8, x14\n\t" /* Round 12 */ "mov x13, v6.d[0]\n\t" "ldr x15, [x3], #8\n\t" "ror x12, x4, #14\n\t" "ror x14, x8, #28\n\t" "eor x12, x12, x4, ror 18\n\t" "eor x14, x14, x8, ror 34\n\t" "eor x12, x12, x4, ror 41\n\t" "eor x14, x14, x8, ror 39\n\t" "add x7, x7, x12\n\t" "eor x17, x8, x9\n\t" "eor x12, x5, x6\n\t" "and x16, x17, x16\n\t" "and x12, x12, x4\n\t" "add x7, x7, x13\n\t" "eor x12, x12, x6\n\t" "add x7, x7, x15\n\t" "eor x16, x16, x9\n\t" "add x7, x7, x12\n\t" "add x14, x14, x16\n\t" "add x11, x11, x7\n\t" "add x7, x7, x14\n\t" /* Round 13 */ "mov x13, v6.d[1]\n\t" "ldr x15, [x3], #8\n\t" "ror x12, x11, #14\n\t" "ror x14, x7, #28\n\t" "eor x12, x12, x11, ror 18\n\t" "eor x14, x14, x7, ror 34\n\t" "eor x12, x12, x11, ror 41\n\t" "eor x14, x14, x7, ror 39\n\t" "add x6, x6, x12\n\t" "eor x16, x7, x8\n\t" "eor x12, x4, x5\n\t" "and x17, x16, x17\n\t" "and x12, x12, x11\n\t" "add x6, x6, x13\n\t" "eor x12, x12, x5\n\t" "add x6, x6, x15\n\t" "eor x17, x17, x8\n\t" "add x6, x6, x12\n\t" "add x14, x14, x17\n\t" "add x10, x10, x6\n\t" "add x6, x6, x14\n\t" /* Round 14 */ "mov x13, v7.d[0]\n\t" "ldr x15, [x3], #8\n\t" "ror x12, x10, #14\n\t" "ror x14, x6, #28\n\t" "eor x12, x12, x10, ror 18\n\t" "eor x14, x14, x6, ror 34\n\t" "eor x12, x12, x10, ror 41\n\t" "eor x14, x14, x6, ror 39\n\t" "add x5, x5, x12\n\t" "eor x17, x6, x7\n\t" "eor x12, x11, x4\n\t" "and x16, x17, x16\n\t" "and x12, x12, x10\n\t" "add x5, x5, x13\n\t" "eor x12, x12, x4\n\t" "add x5, x5, x15\n\t" "eor x16, x16, x7\n\t" "add x5, x5, x12\n\t" "add x14, x14, x16\n\t" "add x9, x9, x5\n\t" "add x5, x5, x14\n\t" /* Round 15 */ "mov x13, v7.d[1]\n\t" "ldr x15, [x3], #8\n\t" "ror x12, x9, #14\n\t" "ror x14, x5, #28\n\t" "eor x12, x12, x9, ror 18\n\t" "eor x14, x14, x5, ror 34\n\t" "eor x12, x12, x9, ror 41\n\t" "eor x14, x14, x5, ror 39\n\t" "add x4, x4, x12\n\t" "eor x16, x5, x6\n\t" "eor x12, x10, x11\n\t" "and x17, x16, x17\n\t" "and x12, x12, x9\n\t" "add x4, x4, x13\n\t" "eor x12, x12, x11\n\t" "add x4, x4, x15\n\t" "eor x17, x17, x6\n\t" "add x4, x4, x12\n\t" "add x14, x14, x17\n\t" "add x8, x8, x4\n\t" "add x4, x4, x14\n\t" "add x11, x11, x26\n\t" "add x10, x10, x25\n\t" "add x9, x9, x24\n\t" "add x8, x8, x23\n\t" "add x7, x7, x22\n\t" "add x6, x6, x21\n\t" "add x5, x5, x20\n\t" "add x4, x4, x19\n\t" "adr x3, %[L_SHA512_transform_neon_len_k]\n\t" "subs %w[len], %w[len], #0x80\n\t" "bne L_sha512_len_neon_begin_%=\n\t" "stp x4, x5, [%x[sha512]]\n\t" "stp x6, x7, [%x[sha512], #16]\n\t" "stp x8, x9, [%x[sha512], #32]\n\t" "stp x10, x11, [%x[sha512], #48]\n\t" "ldp x29, x30, [sp], #16\n\t" : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len) : [L_SHA512_transform_neon_len_k] "S" (L_SHA512_transform_neon_len_k), [L_SHA512_transform_neon_len_ror8] "S" (L_SHA512_transform_neon_len_ror8) : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11" ); } #endif /* WOLFSSL_ARMASM */ #endif /* __aarch64__ */